Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

LUCENE-10106: Sort optimization wrongly skip first docs #300

Merged
merged 2 commits into from
Sep 15, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ public abstract class NumericLeafComparator implements LeafFieldComparator {

private DocIdSetIterator competitiveIterator;
private long iteratorCost;
private int maxDocVisited = 0;
private int maxDocVisited = -1;
private int updateCounter = 0;

public NumericLeafComparator(LeafReaderContext context) throws IOException {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@
import static org.apache.lucene.search.SortField.FIELD_SCORE;

import java.io.IOException;
import java.util.Collections;
import java.util.List;
import java.util.stream.Collectors;
import java.util.stream.LongStream;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FloatDocValuesField;
Expand Down Expand Up @@ -633,4 +637,84 @@ public void testPointValidation() throws IOException {
reader.close();
dir.close();
}

public void testMaxDocVisited() throws IOException {
Directory dir = newDirectory();
IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig());
int numDocs = atLeast(10000);
long offset = 100 + random().nextInt(100);
long smallestValue = 50 + random().nextInt(50);
boolean flushed = false;
for (int i = 0; i < numDocs; ++i) {
Document doc = new Document();
doc.add(new NumericDocValuesField("my_field", i + offset));
doc.add(new LongPoint("my_field", i + offset));
writer.addDocument(doc);
if (i >= 5000 && flushed == false) {
flushed = true;
writer.flush();
// Index the smallest value to the first slot of the second segment
doc = new Document();
doc.add(new NumericDocValuesField("my_field", smallestValue));
doc.add(new LongPoint("my_field", smallestValue));
writer.addDocument(doc);
}
}
IndexReader reader = DirectoryReader.open(writer);
writer.close();
IndexSearcher searcher = new IndexSearcher(reader);
SortField sortField = new SortField("my_field", SortField.Type.LONG);
TopFieldDocs topDocs =
searcher.search(new MatchAllDocsQuery(), 1 + random().nextInt(100), new Sort(sortField));
FieldDoc fieldDoc = (FieldDoc) topDocs.scoreDocs[0];
assertEquals(smallestValue, ((Long) fieldDoc.fields[0]).intValue());
reader.close();
dir.close();
}

public void testRandomLong() throws IOException {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

great test!

Directory dir = newDirectory();
IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig());
List<Long> seqNos = LongStream.range(0, atLeast(10_000)).boxed().collect(Collectors.toList());
Collections.shuffle(seqNos, random());
int pendingDocs = 0;
for (long seqNo : seqNos) {
Document doc = new Document();
doc.add(new NumericDocValuesField("seq_no", seqNo));
doc.add(new LongPoint("seq_no", seqNo));
writer.addDocument(doc);
pendingDocs++;
if (pendingDocs > 500 && random().nextInt(100) <= 5) {
pendingDocs = 0;
writer.flush();
}
}
writer.flush();
seqNos.sort(Long::compare);
IndexReader reader = DirectoryReader.open(writer);
writer.close();
IndexSearcher searcher = new IndexSearcher(reader);
SortField sortField = new SortField("seq_no", SortField.Type.LONG);
int visitedHits = 0;
ScoreDoc after = null;
while (visitedHits < seqNos.size()) {
int batch = 1 + random().nextInt(100);
Query query =
random().nextBoolean()
? new MatchAllDocsQuery()
: LongPoint.newRangeQuery("seq_no", 0, Long.MAX_VALUE);
TopDocs topDocs = searcher.searchAfter(after, query, batch, new Sort(sortField));
int expectedHits = Math.min(seqNos.size() - visitedHits, batch);
assertEquals(expectedHits, topDocs.scoreDocs.length);
after = topDocs.scoreDocs[expectedHits - 1];
for (int i = 0; i < topDocs.scoreDocs.length; i++) {
FieldDoc fieldDoc = (FieldDoc) topDocs.scoreDocs[i];
long expectedSeqNo = seqNos.get(visitedHits);
assertEquals(expectedSeqNo, ((Long) fieldDoc.fields[0]).intValue());
visitedHits++;
}
}
reader.close();
dir.close();
}
}