Skip to content

Commit

Permalink
LUCENE-10106: Sort optimization wrongly skip first docs (#300)
Browse files Browse the repository at this point in the history
The first documents of subsequent segments are mistakenly skipped when 
sort optimization is enabled. We should initialize maxDocVisited in
NumericComparator to -1 instead of 0.
  • Loading branch information
dnhatn authored Sep 15, 2021
1 parent 1586933 commit b7a286d
Show file tree
Hide file tree
Showing 2 changed files with 85 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ public abstract class NumericLeafComparator implements LeafFieldComparator {

private DocIdSetIterator competitiveIterator;
private long iteratorCost;
private int maxDocVisited = 0;
private int maxDocVisited = -1;
private int updateCounter = 0;

public NumericLeafComparator(LeafReaderContext context) throws IOException {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@
import static org.apache.lucene.search.SortField.FIELD_SCORE;

import java.io.IOException;
import java.util.Collections;
import java.util.List;
import java.util.stream.Collectors;
import java.util.stream.LongStream;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FloatDocValuesField;
Expand Down Expand Up @@ -633,4 +637,84 @@ public void testPointValidation() throws IOException {
reader.close();
dir.close();
}

public void testMaxDocVisited() throws IOException {
Directory dir = newDirectory();
IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig());
int numDocs = atLeast(10000);
long offset = 100 + random().nextInt(100);
long smallestValue = 50 + random().nextInt(50);
boolean flushed = false;
for (int i = 0; i < numDocs; ++i) {
Document doc = new Document();
doc.add(new NumericDocValuesField("my_field", i + offset));
doc.add(new LongPoint("my_field", i + offset));
writer.addDocument(doc);
if (i >= 5000 && flushed == false) {
flushed = true;
writer.flush();
// Index the smallest value to the first slot of the second segment
doc = new Document();
doc.add(new NumericDocValuesField("my_field", smallestValue));
doc.add(new LongPoint("my_field", smallestValue));
writer.addDocument(doc);
}
}
IndexReader reader = DirectoryReader.open(writer);
writer.close();
IndexSearcher searcher = new IndexSearcher(reader);
SortField sortField = new SortField("my_field", SortField.Type.LONG);
TopFieldDocs topDocs =
searcher.search(new MatchAllDocsQuery(), 1 + random().nextInt(100), new Sort(sortField));
FieldDoc fieldDoc = (FieldDoc) topDocs.scoreDocs[0];
assertEquals(smallestValue, ((Long) fieldDoc.fields[0]).intValue());
reader.close();
dir.close();
}

public void testRandomLong() throws IOException {
Directory dir = newDirectory();
IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig());
List<Long> seqNos = LongStream.range(0, atLeast(10_000)).boxed().collect(Collectors.toList());
Collections.shuffle(seqNos, random());
int pendingDocs = 0;
for (long seqNo : seqNos) {
Document doc = new Document();
doc.add(new NumericDocValuesField("seq_no", seqNo));
doc.add(new LongPoint("seq_no", seqNo));
writer.addDocument(doc);
pendingDocs++;
if (pendingDocs > 500 && random().nextInt(100) <= 5) {
pendingDocs = 0;
writer.flush();
}
}
writer.flush();
seqNos.sort(Long::compare);
IndexReader reader = DirectoryReader.open(writer);
writer.close();
IndexSearcher searcher = new IndexSearcher(reader);
SortField sortField = new SortField("seq_no", SortField.Type.LONG);
int visitedHits = 0;
ScoreDoc after = null;
while (visitedHits < seqNos.size()) {
int batch = 1 + random().nextInt(100);
Query query =
random().nextBoolean()
? new MatchAllDocsQuery()
: LongPoint.newRangeQuery("seq_no", 0, Long.MAX_VALUE);
TopDocs topDocs = searcher.searchAfter(after, query, batch, new Sort(sortField));
int expectedHits = Math.min(seqNos.size() - visitedHits, batch);
assertEquals(expectedHits, topDocs.scoreDocs.length);
after = topDocs.scoreDocs[expectedHits - 1];
for (int i = 0; i < topDocs.scoreDocs.length; i++) {
FieldDoc fieldDoc = (FieldDoc) topDocs.scoreDocs[i];
long expectedSeqNo = seqNos.get(visitedHits);
assertEquals(expectedSeqNo, ((Long) fieldDoc.fields[0]).intValue());
visitedHits++;
}
}
reader.close();
dir.close();
}
}

0 comments on commit b7a286d

Please sign in to comment.