From c315fa0304847c59fdb07257fb05b78a940481e7 Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Wed, 27 Nov 2024 18:47:17 +0100 Subject: [PATCH 1/6] Make inlining decisions a bit more predictable in our main queries. This implements a small contained hack to make sure that our compound scorers like `MaxScoreBulkScorer`, `ConjunctionBulkScorer`, `BlockMaxConjunctionBulkScorer`, `WANDScorer` and `ConjunctionDISI` only have two concrete implementations of `DocIdSetIterator` and `Scorable` to deal with. This helps because it makes calls to `DocIdSetIterator#nextDoc()`, `DocIdSetIterator#advance(int)` and `Scorable#score()` bimorphic at most, and bimorphic calls are candidate for inlining. This should help speed up boolean queries of term queries at the expense of boolean queries of other query types. This feels fair to me as it gives more speedups than slowdowns in benchmarks, and that boolean queries of term queries are extremely typical. Boolean queries that mix term queries and other types of queries may get a slowdown or a speedup depending on whether they get more from the speedup on their term clauses than they lose on their other clauses. --- .../search/BlockMaxConjunctionBulkScorer.java | 10 +-- .../apache/lucene/search/BooleanScorer.java | 6 +- .../lucene/search/ConjunctionBulkScorer.java | 11 +-- .../apache/lucene/search/ConjunctionDISI.java | 6 +- .../org/apache/lucene/search/DisiWrapper.java | 16 +++- .../lucene/search/DisjunctionMaxScorer.java | 2 +- .../lucene/search/DisjunctionScorer.java | 2 +- .../lucene/search/DisjunctionSumScorer.java | 2 +- .../lucene/search/FilterDocIdSetIterator.java | 50 ++++++++++++ .../lucene/search/IndriDisjunctionScorer.java | 2 +- .../lucene/search/MaxScoreBulkScorer.java | 14 ++-- ...iTermQueryConstantScoreBlendedWrapper.java | 4 +- .../org/apache/lucene/search/ScorerUtil.java | 79 +++++++++++++++++++ .../apache/lucene/search/SynonymQuery.java | 2 +- .../org/apache/lucene/search/WANDScorer.java | 12 +-- .../lucene/search/TestDisiPriorityQueue.java | 2 +- .../sandbox/search/CombinedFieldQuery.java | 2 +- .../lucene/sandbox/search/CoveringScorer.java | 4 +- 18 files changed, 182 insertions(+), 44 deletions(-) create mode 100644 lucene/core/src/java/org/apache/lucene/search/FilterDocIdSetIterator.java diff --git a/lucene/core/src/java/org/apache/lucene/search/BlockMaxConjunctionBulkScorer.java b/lucene/core/src/java/org/apache/lucene/search/BlockMaxConjunctionBulkScorer.java index 9f4531540a50..3022608233e7 100644 --- a/lucene/core/src/java/org/apache/lucene/search/BlockMaxConjunctionBulkScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/BlockMaxConjunctionBulkScorer.java @@ -38,7 +38,7 @@ final class BlockMaxConjunctionBulkScorer extends BulkScorer { private final Scorer[] scorers; private final DocIdSetIterator[] iterators; private final DocIdSetIterator lead1, lead2; - private final Scorer scorer1, scorer2; + private final Scorable scorer1, scorer2; private final DocAndScore scorable = new DocAndScore(); private final double[] sumOfOtherClauses; private final int maxDoc; @@ -51,10 +51,10 @@ final class BlockMaxConjunctionBulkScorer extends BulkScorer { Arrays.sort(this.scorers, Comparator.comparingLong(scorer -> scorer.iterator().cost())); this.iterators = Arrays.stream(this.scorers).map(Scorer::iterator).toArray(DocIdSetIterator[]::new); - lead1 = iterators[0]; - lead2 = iterators[1]; - scorer1 = this.scorers[0]; - scorer2 = this.scorers[1]; + lead1 = ScorerUtil.likelyImpactsEnum(iterators[0]); + lead2 = ScorerUtil.likelyImpactsEnum(iterators[1]); + scorer1 = ScorerUtil.likelyTermScorer(this.scorers[0]); + scorer2 = ScorerUtil.likelyTermScorer(this.scorers[1]); this.sumOfOtherClauses = new double[this.scorers.length]; for (int i = 0; i < sumOfOtherClauses.length; i++) { sumOfOtherClauses[i] = Double.POSITIVE_INFINITY; diff --git a/lucene/core/src/java/org/apache/lucene/search/BooleanScorer.java b/lucene/core/src/java/org/apache/lucene/search/BooleanScorer.java index 7be1558dd71c..9da813dccb85 100644 --- a/lucene/core/src/java/org/apache/lucene/search/BooleanScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/BooleanScorer.java @@ -155,7 +155,7 @@ public int count() throws IOException { this.needsScores = needsScores; LongArrayList costs = new LongArrayList(scorers.size()); for (Scorer scorer : scorers) { - DisiWrapper w = new DisiWrapper(scorer); + DisiWrapper w = new DisiWrapper(scorer, false); costs.add(w.cost); final DisiWrapper evicted = tail.insertWithOverflow(w); if (evicted != null) { @@ -177,7 +177,7 @@ private void scoreDisiWrapperIntoBitSet(DisiWrapper w, Bits acceptDocs, int min, Bucket[] buckets = BooleanScorer.this.buckets; DocIdSetIterator it = w.iterator; - Scorer scorer = w.scorer; + Scorable scorer = w.scorer; int doc = w.doc; if (doc < min) { doc = it.advance(min); @@ -300,7 +300,7 @@ private void scoreWindowSingleScorer( if (doc < windowMin) { doc = it.advance(windowMin); } - collector.setScorer(w.scorer); + collector.setScorer(w.scorable); for (; doc < end; doc = it.nextDoc()) { if (acceptDocs == null || acceptDocs.get(doc)) { collector.collect(doc); diff --git a/lucene/core/src/java/org/apache/lucene/search/ConjunctionBulkScorer.java b/lucene/core/src/java/org/apache/lucene/search/ConjunctionBulkScorer.java index 04ad472577dc..d06804bf2d37 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ConjunctionBulkScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/ConjunctionBulkScorer.java @@ -30,7 +30,7 @@ */ final class ConjunctionBulkScorer extends BulkScorer { - private final Scorer[] scoringScorers; + private final Scorable[] scoringScorers; private final DocIdSetIterator lead1, lead2; private final List others; private final Scorable scorable; @@ -45,21 +45,22 @@ final class ConjunctionBulkScorer extends BulkScorer { allScorers.addAll(requiredScoring); allScorers.addAll(requiredNoScoring); - this.scoringScorers = requiredScoring.toArray(Scorer[]::new); + this.scoringScorers = + requiredScoring.stream().map(ScorerUtil::likelyTermScorer).toArray(Scorable[]::new); List iterators = new ArrayList<>(); for (Scorer scorer : allScorers) { iterators.add(scorer.iterator()); } Collections.sort(iterators, Comparator.comparingLong(DocIdSetIterator::cost)); - lead1 = iterators.get(0); - lead2 = iterators.get(1); + lead1 = ScorerUtil.likelyPostingsEnum(iterators.get(0)); + lead2 = ScorerUtil.likelyPostingsEnum(iterators.get(1)); others = List.copyOf(iterators.subList(2, iterators.size())); scorable = new Scorable() { @Override public float score() throws IOException { double score = 0; - for (Scorer scorer : scoringScorers) { + for (Scorable scorer : scoringScorers) { score += scorer.score(); } return (float) score; diff --git a/lucene/core/src/java/org/apache/lucene/search/ConjunctionDISI.java b/lucene/core/src/java/org/apache/lucene/search/ConjunctionDISI.java index 4fc74b4f0871..dd6fcf59b66a 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ConjunctionDISI.java +++ b/lucene/core/src/java/org/apache/lucene/search/ConjunctionDISI.java @@ -157,9 +157,9 @@ private ConjunctionDISI(List iterators) { // Sort the array the first time to allow the least frequent DocsEnum to // lead the matching. CollectionUtil.timSort(iterators, (o1, o2) -> Long.compare(o1.cost(), o2.cost())); - lead1 = iterators.get(0); - lead2 = iterators.get(1); - others = iterators.subList(2, iterators.size()).toArray(new DocIdSetIterator[0]); + lead1 = ScorerUtil.likelyPostingsEnum(iterators.get(0)); + lead2 = ScorerUtil.likelyPostingsEnum(iterators.get(1)); + others = iterators.subList(2, iterators.size()).toArray(DocIdSetIterator[]::new); } private int doNext(int doc) throws IOException { diff --git a/lucene/core/src/java/org/apache/lucene/search/DisiWrapper.java b/lucene/core/src/java/org/apache/lucene/search/DisiWrapper.java index 350bffc940e4..7b72a92a861f 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DisiWrapper.java +++ b/lucene/core/src/java/org/apache/lucene/search/DisiWrapper.java @@ -16,6 +16,8 @@ */ package org.apache.lucene.search; +import java.util.Objects; + /** * Wrapper used in {@link DisiPriorityQueue}. * @@ -24,6 +26,7 @@ public class DisiWrapper { public final DocIdSetIterator iterator; public final Scorer scorer; + public final Scorable scorable; public final long cost; public final float matchCost; // the match cost for two-phase iterators, 0 otherwise public int doc; // the current doc, used for comparison @@ -42,15 +45,20 @@ public class DisiWrapper { // for MaxScoreBulkScorer float maxWindowScore; - public DisiWrapper(Scorer scorer) { - this.scorer = scorer; - this.iterator = scorer.iterator(); + public DisiWrapper(Scorer scorer, boolean impacts) { + this.scorer = Objects.requireNonNull(scorer); + this.scorable = ScorerUtil.likelyTermScorer(scorer); + if (impacts) { + this.iterator = ScorerUtil.likelyImpactsEnum(scorer.iterator()); + } else { + this.iterator = ScorerUtil.likelyPostingsEnum(scorer.iterator()); + } this.cost = iterator.cost(); this.doc = -1; this.twoPhaseView = scorer.twoPhaseIterator(); if (twoPhaseView != null) { - approximation = twoPhaseView.approximation(); + approximation = ScorerUtil.likelyPostingsEnum(twoPhaseView.approximation()); matchCost = twoPhaseView.matchCost(); } else { approximation = iterator; diff --git a/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxScorer.java b/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxScorer.java index c12a131fc5ef..58965a5e58f6 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxScorer.java @@ -60,7 +60,7 @@ protected float score(DisiWrapper topList) throws IOException { float scoreMax = 0; double otherScoreSum = 0; for (DisiWrapper w = topList; w != null; w = w.next) { - float subScore = w.scorer.score(); + float subScore = w.scorable.score(); if (subScore >= scoreMax) { otherScoreSum += scoreMax; scoreMax = subScore; diff --git a/lucene/core/src/java/org/apache/lucene/search/DisjunctionScorer.java b/lucene/core/src/java/org/apache/lucene/search/DisjunctionScorer.java index 8b4da6ffed76..54681f4a8ace 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DisjunctionScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/DisjunctionScorer.java @@ -37,7 +37,7 @@ protected DisjunctionScorer(List subScorers, ScoreMode scoreMode) throws } this.subScorers = new DisiPriorityQueue(subScorers.size()); for (Scorer scorer : subScorers) { - final DisiWrapper w = new DisiWrapper(scorer); + final DisiWrapper w = new DisiWrapper(scorer, false); this.subScorers.add(w); } this.needsScores = scoreMode != ScoreMode.COMPLETE_NO_SCORES; diff --git a/lucene/core/src/java/org/apache/lucene/search/DisjunctionSumScorer.java b/lucene/core/src/java/org/apache/lucene/search/DisjunctionSumScorer.java index 3b5b98378c80..fb9648e725fc 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DisjunctionSumScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/DisjunctionSumScorer.java @@ -40,7 +40,7 @@ protected float score(DisiWrapper topList) throws IOException { double score = 0; for (DisiWrapper w = topList; w != null; w = w.next) { - score += w.scorer.score(); + score += w.scorable.score(); } return (float) score; } diff --git a/lucene/core/src/java/org/apache/lucene/search/FilterDocIdSetIterator.java b/lucene/core/src/java/org/apache/lucene/search/FilterDocIdSetIterator.java new file mode 100644 index 000000000000..4c40180d2dac --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/search/FilterDocIdSetIterator.java @@ -0,0 +1,50 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.search; + +import java.io.IOException; + +/** Wrapper around a {@link DocIdSetIterator}. */ +public class FilterDocIdSetIterator extends DocIdSetIterator { + + protected final DocIdSetIterator in; + + /** Sole constructor. */ + public FilterDocIdSetIterator(DocIdSetIterator in) { + this.in = in; + } + + @Override + public int docID() { + return in.docID(); + } + + @Override + public int nextDoc() throws IOException { + return in.nextDoc(); + } + + @Override + public int advance(int target) throws IOException { + return in.advance(target); + } + + @Override + public long cost() { + return in.cost(); + } +} diff --git a/lucene/core/src/java/org/apache/lucene/search/IndriDisjunctionScorer.java b/lucene/core/src/java/org/apache/lucene/search/IndriDisjunctionScorer.java index e501013f4abb..6836269189db 100644 --- a/lucene/core/src/java/org/apache/lucene/search/IndriDisjunctionScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/IndriDisjunctionScorer.java @@ -35,7 +35,7 @@ protected IndriDisjunctionScorer(List subScorersList, ScoreMode scoreMod this.subScorersList = subScorersList; this.subScorers = new DisiPriorityQueue(subScorersList.size()); for (Scorer scorer : subScorersList) { - final DisiWrapper w = new DisiWrapper(scorer); + final DisiWrapper w = new DisiWrapper(scorer, false); this.subScorers.add(w); } this.approximation = new DisjunctionDISIApproximation(this.subScorers); diff --git a/lucene/core/src/java/org/apache/lucene/search/MaxScoreBulkScorer.java b/lucene/core/src/java/org/apache/lucene/search/MaxScoreBulkScorer.java index 56857bc67cc1..3229535ae11c 100644 --- a/lucene/core/src/java/org/apache/lucene/search/MaxScoreBulkScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/MaxScoreBulkScorer.java @@ -57,7 +57,7 @@ final class MaxScoreBulkScorer extends BulkScorer { int i = 0; long cost = 0; for (Scorer scorer : scorers) { - DisiWrapper w = new DisiWrapper(scorer); + DisiWrapper w = new DisiWrapper(scorer, true); cost += w.cost; allScorers[i++] = w; } @@ -168,7 +168,7 @@ private void scoreInnerWindowSingleEssentialClause( if (acceptDocs != null && acceptDocs.get(doc) == false) { continue; } - scoreNonEssentialClauses(collector, doc, top.scorer.score(), firstEssentialScorer); + scoreNonEssentialClauses(collector, doc, top.scorable.score(), firstEssentialScorer); } top.doc = top.iterator.docID(); essentialQueue.updateTop(); @@ -196,7 +196,7 @@ private void scoreInnerWindowAsConjunction(LeafCollector collector, Bits acceptD continue; } - double score = lead1.scorer.score(); + double score = lead1.scorable.score(); // We specialize handling the second best scorer, which seems to help a bit with performance. // But this is the exact same logic as in the below for loop. @@ -215,7 +215,7 @@ private void scoreInnerWindowAsConjunction(LeafCollector collector, Bits acceptD continue; } - score += lead2.scorer.score(); + score += lead2.scorable.score(); for (int i = allScorers.length - 3; i >= firstRequiredScorer; --i) { if ((float) MathUtil.sumUpperBound(score + maxScoreSums[i], allScorers.length) @@ -233,7 +233,7 @@ private void scoreInnerWindowAsConjunction(LeafCollector collector, Bits acceptD lead1.doc = lead1.iterator.advance(Math.min(w.doc, max)); continue outer; } - score += w.scorer.score(); + score += w.scorable.score(); } scoreNonEssentialClauses(collector, lead1.doc, score, firstRequiredScorer); @@ -254,7 +254,7 @@ private void scoreInnerWindowMultipleEssentialClauses( if (acceptDocs == null || acceptDocs.get(doc)) { final int i = doc - innerWindowMin; windowMatches[i >>> 6] |= 1L << i; - windowScores[i] += top.scorer.score(); + windowScores[i] += top.scorable.score(); } } top.doc = top.iterator.docID(); @@ -344,7 +344,7 @@ private void scoreNonEssentialClauses( scorer.doc = scorer.iterator.advance(doc); } if (scorer.doc == doc) { - score += scorer.scorer.score(); + score += scorer.scorable.score(); } } diff --git a/lucene/core/src/java/org/apache/lucene/search/MultiTermQueryConstantScoreBlendedWrapper.java b/lucene/core/src/java/org/apache/lucene/search/MultiTermQueryConstantScoreBlendedWrapper.java index c108f6d9c1c4..78a75fd2b3f6 100644 --- a/lucene/core/src/java/org/apache/lucene/search/MultiTermQueryConstantScoreBlendedWrapper.java +++ b/lucene/core/src/java/org/apache/lucene/search/MultiTermQueryConstantScoreBlendedWrapper.java @@ -113,10 +113,10 @@ protected boolean lessThan(PostingsEnum a, PostingsEnum b) { DisiPriorityQueue subs = new DisiPriorityQueue(highFrequencyTerms.size() + 1); for (DocIdSetIterator disi : highFrequencyTerms) { Scorer s = wrapWithDummyScorer(this, disi); - subs.add(new DisiWrapper(s)); + subs.add(new DisiWrapper(s, false)); } Scorer s = wrapWithDummyScorer(this, otherTerms.build().iterator()); - subs.add(new DisiWrapper(s)); + subs.add(new DisiWrapper(s, false)); return new WeightOrDocIdSetIterator(new DisjunctionDISIApproximation(subs)); } diff --git a/lucene/core/src/java/org/apache/lucene/search/ScorerUtil.java b/lucene/core/src/java/org/apache/lucene/search/ScorerUtil.java index 50c960719cf9..0d8721ceb237 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ScorerUtil.java +++ b/lucene/core/src/java/org/apache/lucene/search/ScorerUtil.java @@ -16,12 +16,51 @@ */ package org.apache.lucene.search; +import java.io.IOException; import java.util.stream.LongStream; import java.util.stream.StreamSupport; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.FeatureField; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.ImpactsEnum; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.LeafReader; +import org.apache.lucene.index.PostingsEnum; +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.store.ByteBuffersDirectory; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.PriorityQueue; /** Util class for Scorer related methods */ class ScorerUtil { + + private static final Class DEFAULT_POSTINGS_ENUM_CLASS; + private static final Class DEFAULT_IMPACTS_ENUM_CLASS; + + static { + try (Directory dir = new ByteBuffersDirectory(); + IndexWriter w = new IndexWriter(dir, new IndexWriterConfig())) { + Document doc = new Document(); + doc.add(new FeatureField("field", "value", 1f)); + w.addDocument(doc); + try (DirectoryReader reader = DirectoryReader.open(w)) { + LeafReader leafReader = reader.leaves().get(0).reader(); + TermsEnum te = leafReader.terms("field").iterator(); + if (te.seekExact(new BytesRef("value")) == false) { + throw new Error(); + } + PostingsEnum pe = te.postings(null, PostingsEnum.NONE); + DEFAULT_POSTINGS_ENUM_CLASS = pe.getClass(); + ImpactsEnum ie = te.impacts(PostingsEnum.FREQS); + DEFAULT_IMPACTS_ENUM_CLASS = ie.getClass(); + } + } catch (IOException e) { + throw new Error(e); + } + } + static long costWithMinShouldMatch(LongStream costs, int numScorers, int minShouldMatch) { // the idea here is the following: a boolean query c1,c2,...cn with minShouldMatch=m // could be rewritten to: @@ -46,4 +85,44 @@ protected boolean lessThan(Long a, Long b) { costs.forEach(pq::insertWithOverflow); return StreamSupport.stream(pq.spliterator(), false).mapToLong(Number::longValue).sum(); } + + /** + * Optimize a {@link DocIdSetIterator} for the case when it is likely implemented via an {@link + * ImpactsEnum}. This return method only has 2 possible return types, which helps make sure that + * calls to {@link DocIdSetIterator#nextDoc()} and {@link DocIdSetIterator#advance(int)} are + * bimorphic at most and candidate for inlining. + */ + static DocIdSetIterator likelyImpactsEnum(DocIdSetIterator it) { + if (it.getClass() != DEFAULT_IMPACTS_ENUM_CLASS + && it.getClass() != FilterDocIdSetIterator.class) { + it = new FilterDocIdSetIterator(it); + } + return it; + } + + /** + * Optimize a {@link DocIdSetIterator} for the case when it is likely implemented via a {@link + * PostingsEnum}. This return method only has 2 possible return types, which helps make sure that + * calls to {@link DocIdSetIterator#nextDoc()} and {@link DocIdSetIterator#advance(int)} are + * bimorphic at most and candidate for inlining. + */ + static DocIdSetIterator likelyPostingsEnum(DocIdSetIterator it) { + if (it.getClass() != DEFAULT_POSTINGS_ENUM_CLASS + && it.getClass() != FilterDocIdSetIterator.class) { + it = new FilterDocIdSetIterator(it); + } + return it; + } + + /** + * Optimize a {@link Scorable} for the case when it is likely implemented via a {@link + * TermScorer}. This return method only has 2 possible return types, which helps make sure that + * calls to {@link Scorable#score()} are bimorphic at most and candidate for inlining. + */ + static Scorable likelyTermScorer(Scorable scorable) { + if (scorable.getClass() != TermScorer.class && scorable.getClass() != FilterScorable.class) { + scorable = new FilterScorable(scorable); + } + return scorable; + } } diff --git a/lucene/core/src/java/org/apache/lucene/search/SynonymQuery.java b/lucene/core/src/java/org/apache/lucene/search/SynonymQuery.java index 357f97019f86..ef1073116d7f 100644 --- a/lucene/core/src/java/org/apache/lucene/search/SynonymQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/SynonymQuery.java @@ -646,7 +646,7 @@ private static class DisiWrapperFreq extends DisiWrapper { final float boost; DisiWrapperFreq(Scorer scorer, float boost) { - super(scorer); + super(scorer, false); this.pe = (PostingsEnum) scorer.iterator(); this.boost = boost; } diff --git a/lucene/core/src/java/org/apache/lucene/search/WANDScorer.java b/lucene/core/src/java/org/apache/lucene/search/WANDScorer.java index 59441d21539e..a042e40d6f5e 100644 --- a/lucene/core/src/java/org/apache/lucene/search/WANDScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/WANDScorer.java @@ -196,7 +196,7 @@ private static long scaleMinScore(float minScore, int scalingFactor) { } for (Scorer scorer : scorers) { - addUnpositionedLead(new DisiWrapper(scorer)); + addUnpositionedLead(new DisiWrapper(scorer, scoreMode == ScoreMode.TOP_SCORES)); } this.cost = @@ -221,7 +221,7 @@ private boolean ensureConsistent() throws IOException { List leadScores = new ArrayList<>(); for (DisiWrapper w = lead; w != null; w = w.next) { assert w.doc == doc; - leadScores.add(w.scorer.score()); + leadScores.add(w.scorable.score()); } // Make sure to recompute the sum in the same order to get the same floating point rounding // errors. @@ -266,7 +266,7 @@ public final Collection getChildren() throws IOException { List matchingChildren = new ArrayList<>(); advanceAllTail(); for (DisiWrapper s = lead; s != null; s = s.next) { - matchingChildren.add(new ChildScorable(s.scorer, "SHOULD")); + matchingChildren.add(new ChildScorable(s.scorable, "SHOULD")); } return matchingChildren; } @@ -370,7 +370,7 @@ private void addLead(DisiWrapper lead) throws IOException { this.lead = lead; freq += 1; if (scoreMode == ScoreMode.TOP_SCORES) { - leadScore += lead.scorer.score(); + leadScore += lead.scorable.score(); } } @@ -522,7 +522,7 @@ private void moveToNextCandidate() throws IOException { lead.next = null; freq = 1; if (scoreMode == ScoreMode.TOP_SCORES) { - leadScore = lead.scorer.score(); + leadScore = lead.scorable.score(); } while (head.size() > 0 && head.top().doc == doc) { addLead(head.pop()); @@ -553,7 +553,7 @@ public float score() throws IOException { if (scoreMode != ScoreMode.TOP_SCORES) { // With TOP_SCORES, the score was already computed on the fly. for (DisiWrapper s = lead; s != null; s = s.next) { - leadScore += s.scorer.score(); + leadScore += s.scorable.score(); } } return (float) leadScore; diff --git a/lucene/core/src/test/org/apache/lucene/search/TestDisiPriorityQueue.java b/lucene/core/src/test/org/apache/lucene/search/TestDisiPriorityQueue.java index 62dc0a94ca3b..fb7afac8ba47 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestDisiPriorityQueue.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestDisiPriorityQueue.java @@ -70,7 +70,7 @@ public void testRandom() throws Exception { private static DisiWrapper wrapper(DocIdSetIterator iterator) throws IOException { Query q = new DummyQuery(iterator); Scorer s = q.createWeight(null, ScoreMode.COMPLETE_NO_SCORES, 1.0f).scorer(null); - return new DisiWrapper(s); + return new DisiWrapper(s, random().nextBoolean()); } private static DocIdSetIterator randomDisi(Random r) { diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/search/CombinedFieldQuery.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/search/CombinedFieldQuery.java index f709cfe2f752..49271ea21221 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/search/CombinedFieldQuery.java +++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/search/CombinedFieldQuery.java @@ -425,7 +425,7 @@ private static class WeightedDisiWrapper extends DisiWrapper { final float weight; WeightedDisiWrapper(Scorer scorer, float weight) { - super(scorer); + super(scorer, false); this.weight = weight; } diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/search/CoveringScorer.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/search/CoveringScorer.java index a4662b2c679b..dfedb51ed1f4 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/search/CoveringScorer.java +++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/search/CoveringScorer.java @@ -54,7 +54,7 @@ final class CoveringScorer extends Scorer { subScorers = new DisiPriorityQueue(scorers.size()); for (Scorer scorer : scorers) { - subScorers.add(new DisiWrapper(scorer)); + subScorers.add(new DisiWrapper(scorer, false)); } this.cost = scorers.stream().map(Scorer::iterator).mapToLong(DocIdSetIterator::cost).sum(); @@ -210,7 +210,7 @@ public float score() throws IOException { setTopListAndFreqIfNecessary(); double score = 0; for (DisiWrapper w = topList; w != null; w = w.next) { - score += w.scorer.score(); + score += w.scorable.score(); } return (float) score; } From 1e681a2c537f1a1cf9ebc654f75ab4f39a3dd419 Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Wed, 27 Nov 2024 19:05:16 +0100 Subject: [PATCH 2/6] CHANGES --- lucene/CHANGES.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index a27d7c70e3f1..dd8a26c42577 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -110,6 +110,9 @@ Optimizations * GITHUB#14021: WANDScorer now computes scores on the fly, which helps prevent advancing "tail" clauses in many cases. (Adrien Grand) +* GITHUB#14023: Make JVM inlining decisions more predictable in our main + queries. (Adrien Grand) + Bug Fixes --------------------- * GITHUB#13832: Fixed an issue where the DefaultPassageFormatter.format method did not format passages as intended From 318f1e17e5800da0a58678495a712746f75d5285 Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Wed, 27 Nov 2024 19:19:07 +0100 Subject: [PATCH 3/6] Fix test --- .../org/apache/lucene/sandbox/search/CombinedFieldQuery.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/search/CombinedFieldQuery.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/search/CombinedFieldQuery.java index 49271ea21221..0fbcd9e2a9aa 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/search/CombinedFieldQuery.java +++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/search/CombinedFieldQuery.java @@ -422,15 +422,17 @@ public boolean isCacheable(LeafReaderContext ctx) { } private static class WeightedDisiWrapper extends DisiWrapper { + final PostingsEnum postingsEnum; final float weight; WeightedDisiWrapper(Scorer scorer, float weight) { super(scorer, false); this.weight = weight; + this.postingsEnum = (PostingsEnum) scorer.iterator(); } float freq() throws IOException { - return weight * ((PostingsEnum) iterator).freq(); + return weight * postingsEnum.freq(); } } From 17f4d01102248220418697e34a5b0b2adb4210ca Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Wed, 27 Nov 2024 19:28:20 +0100 Subject: [PATCH 4/6] fix --- .../core/src/java/org/apache/lucene/search/WANDScorer.java | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/lucene/core/src/java/org/apache/lucene/search/WANDScorer.java b/lucene/core/src/java/org/apache/lucene/search/WANDScorer.java index a042e40d6f5e..a08b69e3fd99 100644 --- a/lucene/core/src/java/org/apache/lucene/search/WANDScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/WANDScorer.java @@ -196,7 +196,12 @@ private static long scaleMinScore(float minScore, int scalingFactor) { } for (Scorer scorer : scorers) { - addUnpositionedLead(new DisiWrapper(scorer, scoreMode == ScoreMode.TOP_SCORES)); + // Ideally we would pass true when scoreMode == TOP_SCORES and false otherwise, but this would + // break the optimization as there could then be 3 different impls of DocIdSetIterator + // (ImpactsEnum, PostingsEnum and ). So we pass true to favor disjunctions sorted by + // descending score as opposed to non-scoring disjunctions whose minShouldMatch is greater + // than 1. + addUnpositionedLead(new DisiWrapper(scorer, true)); } this.cost = From b3a228c831d004df0dbcb8774f54bb4ba6a4c975 Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Fri, 29 Nov 2024 11:17:24 +0100 Subject: [PATCH 5/6] iter --- .../org/apache/lucene/search/BooleanScorer.java | 2 +- .../lucene/search/ConjunctionBulkScorer.java | 4 ++-- .../apache/lucene/search/ConjunctionDISI.java | 6 +++--- .../org/apache/lucene/search/DisiWrapper.java | 4 ++-- .../org/apache/lucene/search/ScorerUtil.java | 17 ----------------- 5 files changed, 8 insertions(+), 25 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/search/BooleanScorer.java b/lucene/core/src/java/org/apache/lucene/search/BooleanScorer.java index 9da813dccb85..1d05aeb59afb 100644 --- a/lucene/core/src/java/org/apache/lucene/search/BooleanScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/BooleanScorer.java @@ -177,7 +177,7 @@ private void scoreDisiWrapperIntoBitSet(DisiWrapper w, Bits acceptDocs, int min, Bucket[] buckets = BooleanScorer.this.buckets; DocIdSetIterator it = w.iterator; - Scorable scorer = w.scorer; + Scorable scorer = w.scorable; int doc = w.doc; if (doc < min) { doc = it.advance(min); diff --git a/lucene/core/src/java/org/apache/lucene/search/ConjunctionBulkScorer.java b/lucene/core/src/java/org/apache/lucene/search/ConjunctionBulkScorer.java index d06804bf2d37..e9bc41701a6c 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ConjunctionBulkScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/ConjunctionBulkScorer.java @@ -52,8 +52,8 @@ final class ConjunctionBulkScorer extends BulkScorer { iterators.add(scorer.iterator()); } Collections.sort(iterators, Comparator.comparingLong(DocIdSetIterator::cost)); - lead1 = ScorerUtil.likelyPostingsEnum(iterators.get(0)); - lead2 = ScorerUtil.likelyPostingsEnum(iterators.get(1)); + lead1 = iterators.get(0); + lead2 = iterators.get(1); others = List.copyOf(iterators.subList(2, iterators.size())); scorable = new Scorable() { diff --git a/lucene/core/src/java/org/apache/lucene/search/ConjunctionDISI.java b/lucene/core/src/java/org/apache/lucene/search/ConjunctionDISI.java index dd6fcf59b66a..4fc74b4f0871 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ConjunctionDISI.java +++ b/lucene/core/src/java/org/apache/lucene/search/ConjunctionDISI.java @@ -157,9 +157,9 @@ private ConjunctionDISI(List iterators) { // Sort the array the first time to allow the least frequent DocsEnum to // lead the matching. CollectionUtil.timSort(iterators, (o1, o2) -> Long.compare(o1.cost(), o2.cost())); - lead1 = ScorerUtil.likelyPostingsEnum(iterators.get(0)); - lead2 = ScorerUtil.likelyPostingsEnum(iterators.get(1)); - others = iterators.subList(2, iterators.size()).toArray(DocIdSetIterator[]::new); + lead1 = iterators.get(0); + lead2 = iterators.get(1); + others = iterators.subList(2, iterators.size()).toArray(new DocIdSetIterator[0]); } private int doNext(int doc) throws IOException { diff --git a/lucene/core/src/java/org/apache/lucene/search/DisiWrapper.java b/lucene/core/src/java/org/apache/lucene/search/DisiWrapper.java index 7b72a92a861f..5673f16b37ba 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DisiWrapper.java +++ b/lucene/core/src/java/org/apache/lucene/search/DisiWrapper.java @@ -51,14 +51,14 @@ public DisiWrapper(Scorer scorer, boolean impacts) { if (impacts) { this.iterator = ScorerUtil.likelyImpactsEnum(scorer.iterator()); } else { - this.iterator = ScorerUtil.likelyPostingsEnum(scorer.iterator()); + this.iterator = scorer.iterator(); } this.cost = iterator.cost(); this.doc = -1; this.twoPhaseView = scorer.twoPhaseIterator(); if (twoPhaseView != null) { - approximation = ScorerUtil.likelyPostingsEnum(twoPhaseView.approximation()); + approximation = twoPhaseView.approximation(); matchCost = twoPhaseView.matchCost(); } else { approximation = iterator; diff --git a/lucene/core/src/java/org/apache/lucene/search/ScorerUtil.java b/lucene/core/src/java/org/apache/lucene/search/ScorerUtil.java index 0d8721ceb237..1154dc1b1541 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ScorerUtil.java +++ b/lucene/core/src/java/org/apache/lucene/search/ScorerUtil.java @@ -36,7 +36,6 @@ /** Util class for Scorer related methods */ class ScorerUtil { - private static final Class DEFAULT_POSTINGS_ENUM_CLASS; private static final Class DEFAULT_IMPACTS_ENUM_CLASS; static { @@ -51,8 +50,6 @@ class ScorerUtil { if (te.seekExact(new BytesRef("value")) == false) { throw new Error(); } - PostingsEnum pe = te.postings(null, PostingsEnum.NONE); - DEFAULT_POSTINGS_ENUM_CLASS = pe.getClass(); ImpactsEnum ie = te.impacts(PostingsEnum.FREQS); DEFAULT_IMPACTS_ENUM_CLASS = ie.getClass(); } @@ -100,20 +97,6 @@ static DocIdSetIterator likelyImpactsEnum(DocIdSetIterator it) { return it; } - /** - * Optimize a {@link DocIdSetIterator} for the case when it is likely implemented via a {@link - * PostingsEnum}. This return method only has 2 possible return types, which helps make sure that - * calls to {@link DocIdSetIterator#nextDoc()} and {@link DocIdSetIterator#advance(int)} are - * bimorphic at most and candidate for inlining. - */ - static DocIdSetIterator likelyPostingsEnum(DocIdSetIterator it) { - if (it.getClass() != DEFAULT_POSTINGS_ENUM_CLASS - && it.getClass() != FilterDocIdSetIterator.class) { - it = new FilterDocIdSetIterator(it); - } - return it; - } - /** * Optimize a {@link Scorable} for the case when it is likely implemented via a {@link * TermScorer}. This return method only has 2 possible return types, which helps make sure that From 2122dbed778f224c04c7e285a4dc44e765fdc641 Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Fri, 29 Nov 2024 13:01:27 +0100 Subject: [PATCH 6/6] iter --- .../org/apache/lucene/search/BlockMaxConjunctionScorer.java | 6 +++++- .../src/java/org/apache/lucene/search/BooleanScorer.java | 2 +- .../org/apache/lucene/search/FilterDocIdSetIterator.java | 1 + .../java/org/apache/lucene/search/MaxScoreBulkScorer.java | 2 +- .../core/src/java/org/apache/lucene/search/WANDScorer.java | 2 +- 5 files changed, 9 insertions(+), 4 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/search/BlockMaxConjunctionScorer.java b/lucene/core/src/java/org/apache/lucene/search/BlockMaxConjunctionScorer.java index 76788185faa1..7cc86b7cf83a 100644 --- a/lucene/core/src/java/org/apache/lucene/search/BlockMaxConjunctionScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/BlockMaxConjunctionScorer.java @@ -29,6 +29,7 @@ */ final class BlockMaxConjunctionScorer extends Scorer { final Scorer[] scorers; + final Scorable[] scorables; final DocIdSetIterator[] approximations; final TwoPhaseIterator[] twoPhases; float minScore; @@ -38,6 +39,8 @@ final class BlockMaxConjunctionScorer extends Scorer { this.scorers = scorersList.toArray(new Scorer[scorersList.size()]); // Sort scorer by cost Arrays.sort(this.scorers, Comparator.comparingLong(s -> s.iterator().cost())); + this.scorables = + Arrays.stream(scorers).map(ScorerUtil::likelyTermScorer).toArray(Scorable[]::new); this.approximations = new DocIdSetIterator[scorers.length]; List twoPhaseList = new ArrayList<>(); @@ -50,6 +53,7 @@ final class BlockMaxConjunctionScorer extends Scorer { } else { approximations[i] = scorer.iterator(); } + approximations[i] = ScorerUtil.likelyImpactsEnum(approximations[i]); scorer.advanceShallow(0); } this.twoPhases = twoPhaseList.toArray(new TwoPhaseIterator[twoPhaseList.size()]); @@ -207,7 +211,7 @@ public int docID() { @Override public float score() throws IOException { double score = 0; - for (Scorer scorer : scorers) { + for (Scorable scorer : scorables) { score += scorer.score(); } return (float) score; diff --git a/lucene/core/src/java/org/apache/lucene/search/BooleanScorer.java b/lucene/core/src/java/org/apache/lucene/search/BooleanScorer.java index 1d05aeb59afb..462396e266ea 100644 --- a/lucene/core/src/java/org/apache/lucene/search/BooleanScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/BooleanScorer.java @@ -300,7 +300,7 @@ private void scoreWindowSingleScorer( if (doc < windowMin) { doc = it.advance(windowMin); } - collector.setScorer(w.scorable); + collector.setScorer(w.scorer); for (; doc < end; doc = it.nextDoc()) { if (acceptDocs == null || acceptDocs.get(doc)) { collector.collect(doc); diff --git a/lucene/core/src/java/org/apache/lucene/search/FilterDocIdSetIterator.java b/lucene/core/src/java/org/apache/lucene/search/FilterDocIdSetIterator.java index 4c40180d2dac..2078bf502b87 100644 --- a/lucene/core/src/java/org/apache/lucene/search/FilterDocIdSetIterator.java +++ b/lucene/core/src/java/org/apache/lucene/search/FilterDocIdSetIterator.java @@ -21,6 +21,7 @@ /** Wrapper around a {@link DocIdSetIterator}. */ public class FilterDocIdSetIterator extends DocIdSetIterator { + /** Wrapped instance. */ protected final DocIdSetIterator in; /** Sole constructor. */ diff --git a/lucene/core/src/java/org/apache/lucene/search/MaxScoreBulkScorer.java b/lucene/core/src/java/org/apache/lucene/search/MaxScoreBulkScorer.java index 970aadc344b6..93dd1ea91e31 100644 --- a/lucene/core/src/java/org/apache/lucene/search/MaxScoreBulkScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/MaxScoreBulkScorer.java @@ -53,7 +53,7 @@ final class MaxScoreBulkScorer extends BulkScorer { MaxScoreBulkScorer(int maxDoc, List scorers, Scorer filter) throws IOException { this.maxDoc = maxDoc; - this.filter = filter == null ? null : new DisiWrapper(filter); + this.filter = filter == null ? null : new DisiWrapper(filter, false); allScorers = new DisiWrapper[scorers.size()]; scratch = new DisiWrapper[allScorers.length]; int i = 0; diff --git a/lucene/core/src/java/org/apache/lucene/search/WANDScorer.java b/lucene/core/src/java/org/apache/lucene/search/WANDScorer.java index a08b69e3fd99..897713dbe17d 100644 --- a/lucene/core/src/java/org/apache/lucene/search/WANDScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/WANDScorer.java @@ -271,7 +271,7 @@ public final Collection getChildren() throws IOException { List matchingChildren = new ArrayList<>(); advanceAllTail(); for (DisiWrapper s = lead; s != null; s = s.next) { - matchingChildren.add(new ChildScorable(s.scorable, "SHOULD")); + matchingChildren.add(new ChildScorable(s.scorer, "SHOULD")); } return matchingChildren; }