From da033511e8c8d5efa60d3fba1c229d2bbd0cae1a Mon Sep 17 00:00:00 2001 From: Mayya Sharipova Date: Wed, 23 Sep 2020 10:22:03 -0400 Subject: [PATCH] Fix bug in sort optimization (#1903) (#1915) Fix bug how iterator with skipping functionality advances and produces docs Relates to #1725 Backport for #1903 --- .../java/org/apache/lucene/search/Weight.java | 6 +- .../search/comparators/DocComparator.java | 10 +-- .../search/comparators/NumericComparator.java | 8 +- .../TestFieldSortOptimizationSkipping.java | 87 ++++++++++++++----- 4 files changed, 76 insertions(+), 35 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/search/Weight.java b/lucene/core/src/java/org/apache/lucene/search/Weight.java index b63af31cdb37..3f8f05d70898 100644 --- a/lucene/core/src/java/org/apache/lucene/search/Weight.java +++ b/lucene/core/src/java/org/apache/lucene/search/Weight.java @@ -220,13 +220,13 @@ public int score(LeafCollector collector, Bits acceptDocs, int min, int max) thr // if possible filter scorerIterator to keep only competitive docs as defined by collector DocIdSetIterator filteredIterator = collectorIterator == null ? scorerIterator : ConjunctionDISI.intersectIterators(Arrays.asList(scorerIterator, collectorIterator)); - if (scorer.docID() == -1 && min == 0 && max == DocIdSetIterator.NO_MORE_DOCS) { + if (filteredIterator.docID() == -1 && min == 0 && max == DocIdSetIterator.NO_MORE_DOCS) { scoreAll(collector, filteredIterator, twoPhase, acceptDocs); return DocIdSetIterator.NO_MORE_DOCS; } else { - int doc = scorer.docID(); + int doc = filteredIterator.docID(); if (doc < min) { - doc = scorerIterator.advance(min); + doc = filteredIterator.advance(min); } return scoreRange(collector, filteredIterator, twoPhase, acceptDocs, doc, max); } diff --git a/lucene/core/src/java/org/apache/lucene/search/comparators/DocComparator.java b/lucene/core/src/java/org/apache/lucene/search/comparators/DocComparator.java index 0933bd38f2d2..8974ca69c401 100644 --- a/lucene/core/src/java/org/apache/lucene/search/comparators/DocComparator.java +++ b/lucene/core/src/java/org/apache/lucene/search/comparators/DocComparator.java @@ -133,16 +133,14 @@ public DocIdSetIterator competitiveIterator() { return null; } else { return new DocIdSetIterator() { - private int doc; - @Override public int nextDoc() throws IOException { - return doc = competitiveIterator.nextDoc(); + return competitiveIterator.nextDoc(); } @Override public int docID() { - return doc; + return competitiveIterator.docID(); } @Override @@ -152,7 +150,7 @@ public long cost() { @Override public int advance(int target) throws IOException { - return doc = competitiveIterator.advance(target); + return competitiveIterator.advance(target); } }; } @@ -176,7 +174,7 @@ private void updateIterator() { if (docBase + maxDoc <= minDoc) { competitiveIterator = DocIdSetIterator.empty(); // skip this segment } else { - int segmentMinDoc = Math.max(0, minDoc - docBase); + int segmentMinDoc = Math.max(competitiveIterator.docID(), minDoc - docBase); competitiveIterator = new MinDocIterator(segmentMinDoc, maxDoc); } } diff --git a/lucene/core/src/java/org/apache/lucene/search/comparators/NumericComparator.java b/lucene/core/src/java/org/apache/lucene/search/comparators/NumericComparator.java index 00c6aeb5d13e..7f62f668914f 100644 --- a/lucene/core/src/java/org/apache/lucene/search/comparators/NumericComparator.java +++ b/lucene/core/src/java/org/apache/lucene/search/comparators/NumericComparator.java @@ -228,16 +228,14 @@ public PointValues.Relation compare(byte[] minPackedValue, byte[] maxPackedValue public DocIdSetIterator competitiveIterator() { if (enableSkipping == false) return null; return new DocIdSetIterator() { - private int doc; - @Override public int nextDoc() throws IOException { - return doc = competitiveIterator.nextDoc(); + return competitiveIterator.nextDoc(); } @Override public int docID() { - return doc; + return competitiveIterator.docID(); } @Override @@ -247,7 +245,7 @@ public long cost() { @Override public int advance(int target) throws IOException { - return doc = competitiveIterator.advance(target); + return competitiveIterator.advance(target); } }; } diff --git a/lucene/core/src/test/org/apache/lucene/search/TestFieldSortOptimizationSkipping.java b/lucene/core/src/test/org/apache/lucene/search/TestFieldSortOptimizationSkipping.java index 64ee8fb5c474..96e6ad941ebc 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestFieldSortOptimizationSkipping.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestFieldSortOptimizationSkipping.java @@ -53,6 +53,7 @@ public void testLongSortOptimization() throws IOException { if (i == 7000) writer.flush(); // two segments } final IndexReader reader = DirectoryReader.open(writer); + writer.close(); IndexSearcher searcher = new IndexSearcher(reader); final SortField sortField = new SortField("my_field", SortField.Type.LONG); sortField.setCanUsePoints(); @@ -121,7 +122,6 @@ public void testLongSortOptimization() throws IOException { assertEquals(topDocs.totalHits.value, numDocs); // assert that all documents were collected => optimization was not run } - writer.close(); reader.close(); dir.close(); } @@ -142,6 +142,7 @@ public void testLongSortOptimizationOnFieldNotIndexedWithPoints() throws IOExcep writer.addDocument(doc); } final IndexReader reader = DirectoryReader.open(writer); + writer.close(); IndexSearcher searcher = new IndexSearcher(reader); final SortField sortField = new SortField("my_field", SortField.Type.LONG); sortField.setCanUsePoints(); @@ -159,7 +160,6 @@ public void testLongSortOptimizationOnFieldNotIndexedWithPoints() throws IOExcep } assertEquals(topDocs.totalHits.value, numDocs); // assert that all documents were collected => optimization was not run - writer.close(); reader.close(); dir.close(); } @@ -179,6 +179,7 @@ public void testSortOptimizationWithMissingValues() throws IOException { if (i == 7000) writer.flush(); // two segments } final IndexReader reader = DirectoryReader.open(writer); + writer.close(); IndexSearcher searcher = new IndexSearcher(reader); final int numHits = 3; final int totalHitsThreshold = 3; @@ -206,7 +207,6 @@ public void testSortOptimizationWithMissingValues() throws IOException { assertTrue(topDocs.totalHits.value < numDocs); // assert that some docs were skipped => optimization was run } - writer.close(); reader.close(); dir.close(); } @@ -224,6 +224,7 @@ public void testSortOptimizationEqualValues() throws IOException { if (i == 7000) writer.flush(); // two segments } final IndexReader reader = DirectoryReader.open(writer); + writer.close(); IndexSearcher searcher = new IndexSearcher(reader); final int numHits = 3; final int totalHitsThreshold = 3; @@ -278,7 +279,6 @@ public void testSortOptimizationEqualValues() throws IOException { assertEquals(topDocs.totalHits.value, numDocs); // assert that all documents were collected => optimization was not run } - writer.close(); reader.close(); dir.close(); } @@ -296,6 +296,7 @@ public void testFloatSortOptimization() throws IOException { writer.addDocument(doc); } final IndexReader reader = DirectoryReader.open(writer); + writer.close(); IndexSearcher searcher = new IndexSearcher(reader); final SortField sortField = new SortField("my_field", SortField.Type.FLOAT); sortField.setCanUsePoints(); @@ -316,7 +317,6 @@ public void testFloatSortOptimization() throws IOException { assertTrue(topDocs.totalHits.value < numDocs); } - writer.close(); reader.close(); dir.close(); } @@ -329,14 +329,15 @@ public void testDocSortOptimizationWithAfter() throws IOException { final Document doc = new Document(); writer.addDocument(doc); if ((i > 0) && (i % 50 == 0)) { - writer.commit(); + writer.flush(); } } final IndexReader reader = DirectoryReader.open(writer); - IndexSearcher searcher = new IndexSearcher(reader); - final int numHits = 3; - final int totalHitsThreshold = 3; - final int[] searchAfters = {10, 140, numDocs - 4}; + writer.close(); + IndexSearcher searcher = newSearcher(reader); + final int numHits = 10; + final int totalHitsThreshold = 10; + final int[] searchAfters = {3, 10, numDocs - 10}; for (int searchAfter : searchAfters) { // sort by _doc with search after should trigger optimization { @@ -345,14 +346,15 @@ public void testDocSortOptimizationWithAfter() throws IOException { final TopFieldCollector collector = TopFieldCollector.create(sort, numHits, after, totalHitsThreshold); searcher.search(new MatchAllDocsQuery(), collector); TopDocs topDocs = collector.topDocs(); - assertEquals(numHits, topDocs.scoreDocs.length); - for (int i = 0; i < numHits; i++) { + int expNumHits = (searchAfter >= (numDocs - numHits)) ? (numDocs - searchAfter - 1) : numHits; + assertEquals(expNumHits, topDocs.scoreDocs.length); + for (int i = 0; i < topDocs.scoreDocs.length; i++) { int expectedDocID = searchAfter + 1 + i; assertEquals(expectedDocID, topDocs.scoreDocs[i].doc); } assertTrue(collector.isEarlyTerminated()); // check that very few docs were collected - assertTrue(topDocs.totalHits.value < 10); + assertTrue(topDocs.totalHits.value < numDocs); } // sort by _doc + _score with search after should trigger optimization @@ -362,14 +364,15 @@ public void testDocSortOptimizationWithAfter() throws IOException { final TopFieldCollector collector = TopFieldCollector.create(sort, numHits, after, totalHitsThreshold); searcher.search(new MatchAllDocsQuery(), collector); TopDocs topDocs = collector.topDocs(); - assertEquals(numHits, topDocs.scoreDocs.length); - for (int i = 0; i < numHits; i++) { + int expNumHits = (searchAfter >= (numDocs - numHits)) ? (numDocs - searchAfter - 1) : numHits; + assertEquals(expNumHits, topDocs.scoreDocs.length); + for (int i = 0; i < topDocs.scoreDocs.length; i++) { int expectedDocID = searchAfter + 1 + i; assertEquals(expectedDocID, topDocs.scoreDocs[i].doc); } assertTrue(collector.isEarlyTerminated()); // assert that very few docs were collected - assertTrue(topDocs.totalHits.value < 10); + assertTrue(topDocs.totalHits.value < numDocs); } // sort by _doc desc should not trigger optimization @@ -379,8 +382,9 @@ public void testDocSortOptimizationWithAfter() throws IOException { final TopFieldCollector collector = TopFieldCollector.create(sort, numHits, after, totalHitsThreshold); searcher.search(new MatchAllDocsQuery(), collector); TopDocs topDocs = collector.topDocs(); - assertEquals(numHits, topDocs.scoreDocs.length); - for (int i = 0; i < numHits; i++) { + int expNumHits = (searchAfter < numHits) ? searchAfter : numHits; + assertEquals(expNumHits, topDocs.scoreDocs.length); + for (int i = 0; i < topDocs.scoreDocs.length; i++) { int expectedDocID = searchAfter - 1 - i; assertEquals(expectedDocID, topDocs.scoreDocs[i].doc); } @@ -389,7 +393,6 @@ public void testDocSortOptimizationWithAfter() throws IOException { } } - writer.close(); reader.close(); dir.close(); } @@ -407,12 +410,13 @@ public void testDocSortOptimization() throws IOException { doc.add(new StringField("tf", "seg" + seg, Field.Store.YES)); writer.addDocument(doc); if ((i > 0) && (i % 50 == 0)) { - writer.commit(); + writer.flush(); seg++; } } final IndexReader reader = DirectoryReader.open(writer); - IndexSearcher searcher = new IndexSearcher(reader); + writer.close(); + IndexSearcher searcher = newSearcher(reader);; final int numHits = 3; final int totalHitsThreshold = 3; final Sort sort = new Sort(FIELD_DOC); @@ -450,7 +454,48 @@ public void testDocSortOptimization() throws IOException { assertTrue(topDocs.totalHits.value < 10); // assert that very few docs were collected } + reader.close(); + dir.close(); + } + + /** + * Test that sorting on _doc works correctly. + * This test goes through DefaultBulkSorter::scoreRange, where scorerIterator is BitSetIterator. + * As a conjunction of this BitSetIterator with DocComparator's iterator, we get BitSetConjunctionDISI. + * BitSetConjunctionDISI advances based on the DocComparator's iterator, and doesn't consider + * that its BitSetIterator may have advanced passed a certain doc. + */ + public void testDocSort() throws IOException { + final Directory dir = newDirectory(); + final IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig()); + final int numDocs = 4; + for (int i = 0; i < numDocs; ++i) { + final Document doc = new Document(); + doc.add(new StringField("id", "id" + i, Field.Store.NO)); + if (i < 2) { + doc.add(new LongPoint("lf", 1)); + } + writer.addDocument(doc); + } + final IndexReader reader = DirectoryReader.open(writer); writer.close(); + + IndexSearcher searcher = newSearcher(reader); + searcher.setQueryCache(null); + final int numHits = 10; + final int totalHitsThreshold = 10; + final Sort sort = new Sort(FIELD_DOC); + + { + final TopFieldCollector collector = TopFieldCollector.create(sort, numHits, null, totalHitsThreshold); + BooleanQuery.Builder bq = new BooleanQuery.Builder(); + bq.add(LongPoint.newExactQuery("lf", 1), BooleanClause.Occur.MUST); + bq.add(new TermQuery(new Term("id", "id3")), BooleanClause.Occur.MUST_NOT); + searcher.search(bq.build(), collector); + TopDocs topDocs = collector.topDocs(); + assertEquals(2, topDocs.scoreDocs.length); + } + reader.close(); dir.close(); }