From e9b024537dcd17adf0435832e2bb4fa92d715146 Mon Sep 17 00:00:00 2001 From: Nhat Nguyen Date: Tue, 14 Sep 2021 22:01:55 -0400 Subject: [PATCH 1/2] LUCENE-10106: Sort optimization wrongly skip first docs --- .../search/comparators/NumericComparator.java | 2 +- .../lucene/search/TestSortOptimization.java | 84 +++++++++++++++++++ 2 files changed, 85 insertions(+), 1 deletion(-) diff --git a/lucene/core/src/java/org/apache/lucene/search/comparators/NumericComparator.java b/lucene/core/src/java/org/apache/lucene/search/comparators/NumericComparator.java index 0455cea24dc1..051d9cc77c90 100644 --- a/lucene/core/src/java/org/apache/lucene/search/comparators/NumericComparator.java +++ b/lucene/core/src/java/org/apache/lucene/search/comparators/NumericComparator.java @@ -84,7 +84,7 @@ public abstract class NumericLeafComparator implements LeafFieldComparator { private DocIdSetIterator competitiveIterator; private long iteratorCost; - private int maxDocVisited = 0; + private int maxDocVisited = -1; private int updateCounter = 0; public NumericLeafComparator(LeafReaderContext context) throws IOException { diff --git a/lucene/core/src/test/org/apache/lucene/search/TestSortOptimization.java b/lucene/core/src/test/org/apache/lucene/search/TestSortOptimization.java index ec6ec66d322c..8dfe3ac3d1ac 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestSortOptimization.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestSortOptimization.java @@ -20,6 +20,10 @@ import static org.apache.lucene.search.SortField.FIELD_SCORE; import java.io.IOException; +import java.util.Collections; +import java.util.List; +import java.util.stream.Collectors; +import java.util.stream.LongStream; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.FloatDocValuesField; @@ -633,4 +637,84 @@ public void testPointValidation() throws IOException { reader.close(); dir.close(); } + + public void testMaxDocVisited() throws IOException { + Directory dir = newDirectory(); + IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig()); + int numDocs = atLeast(10000); + long offset = 100 + random().nextInt(100); + long smallestValue = 50 + random().nextInt(50); + boolean flushed = false; + for (int i = 0; i < numDocs; ++i) { + Document doc = new Document(); + doc.add(new NumericDocValuesField("my_field", i + offset)); + doc.add(new LongPoint("my_field", i + offset)); + writer.addDocument(doc); + if (i >= 5000 && flushed == false) { + flushed = true; + writer.flush(); + // Index the smallest value to the first slot of the second segment + doc = new Document(); + doc.add(new NumericDocValuesField("my_field", smallestValue)); + doc.add(new LongPoint("my_field", smallestValue)); + writer.addDocument(doc); + } + } + IndexReader reader = DirectoryReader.open(writer); + writer.close(); + IndexSearcher searcher = new IndexSearcher(reader); + SortField sortField = new SortField("my_field", SortField.Type.LONG); + TopFieldDocs topDocs = + searcher.search(new MatchAllDocsQuery(), 1 + random().nextInt(100), new Sort(sortField)); + FieldDoc fieldDoc = (FieldDoc) topDocs.scoreDocs[0]; + assertEquals(smallestValue, ((Long) fieldDoc.fields[0]).intValue()); + reader.close(); + dir.close(); + } + + public void testRandomLong() throws IOException { + Directory dir = newDirectory(); + IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig()); + List seqNos = LongStream.range(0, atLeast(10_000)).boxed().collect(Collectors.toList()); + Collections.shuffle(seqNos); + int pendingDocs = 0; + for (long seqNo : seqNos) { + Document doc = new Document(); + doc.add(new NumericDocValuesField("seq_no", seqNo)); + doc.add(new LongPoint("seq_no", seqNo)); + writer.addDocument(doc); + pendingDocs++; + if (pendingDocs > 500 && random().nextInt(100) <= 5) { + pendingDocs = 0; + writer.flush(); + } + } + writer.flush(); + seqNos.sort(Long::compare); + IndexReader reader = DirectoryReader.open(writer); + writer.close(); + IndexSearcher searcher = new IndexSearcher(reader); + SortField sortField = new SortField("seq_no", SortField.Type.LONG); + int visitedHits = 0; + ScoreDoc after = null; + while (visitedHits < seqNos.size()) { + int batch = 1 + random().nextInt(100); + Query query = + random().nextBoolean() + ? new MatchAllDocsQuery() + : LongPoint.newRangeQuery("seq_no", 0, Long.MAX_VALUE); + TopDocs topDocs = searcher.searchAfter(after, query, batch, new Sort(sortField)); + int expectedHits = Math.min(seqNos.size() - visitedHits, batch); + assertEquals(expectedHits, topDocs.scoreDocs.length); + after = topDocs.scoreDocs[expectedHits - 1]; + for (int i = 0; i < topDocs.scoreDocs.length; i++) { + FieldDoc fieldDoc = (FieldDoc) topDocs.scoreDocs[i]; + long expectedSeqNo = seqNos.get(visitedHits); + assertEquals(expectedSeqNo, ((Long) fieldDoc.fields[0]).intValue()); + visitedHits++; + } + } + reader.close(); + dir.close(); + } } From 38347789114364819d1db9da9d5dd71c28908c2b Mon Sep 17 00:00:00 2001 From: Nhat Nguyen Date: Tue, 14 Sep 2021 22:14:58 -0400 Subject: [PATCH 2/2] stylecheck --- .../src/test/org/apache/lucene/search/TestSortOptimization.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lucene/core/src/test/org/apache/lucene/search/TestSortOptimization.java b/lucene/core/src/test/org/apache/lucene/search/TestSortOptimization.java index 8dfe3ac3d1ac..4581cf154542 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestSortOptimization.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestSortOptimization.java @@ -676,7 +676,7 @@ public void testRandomLong() throws IOException { Directory dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig()); List seqNos = LongStream.range(0, atLeast(10_000)).boxed().collect(Collectors.toList()); - Collections.shuffle(seqNos); + Collections.shuffle(seqNos, random()); int pendingDocs = 0; for (long seqNo : seqNos) { Document doc = new Document();