-
Notifications
You must be signed in to change notification settings - Fork 1k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Introduce a BulkScorer for DisjunctionMaxQuery. (#14040)
This introduces a bulk scorer for `DisjunctionMaxQuery` that delegates to the bulk scorers of the query clauses. This helps make the performance of top-level `DisjunctionMaxQuery` better, especially when its clauses have optimized bulk scorers themselves (e.g. disjunctions).
- Loading branch information
Showing
4 changed files
with
190 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
127 changes: 127 additions & 0 deletions
127
lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxBulkScorer.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,127 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one or more | ||
* contributor license agreements. See the NOTICE file distributed with | ||
* this work for additional information regarding copyright ownership. | ||
* The ASF licenses this file to You under the Apache License, Version 2.0 | ||
* (the "License"); you may not use this file except in compliance with | ||
* the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
package org.apache.lucene.search; | ||
|
||
import java.io.IOException; | ||
import java.util.Arrays; | ||
import java.util.List; | ||
import java.util.Objects; | ||
import org.apache.lucene.util.Bits; | ||
import org.apache.lucene.util.FixedBitSet; | ||
import org.apache.lucene.util.PriorityQueue; | ||
|
||
/** Bulk scorer for {@link DisjunctionMaxQuery} when the tie-break multiplier is zero. */ | ||
final class DisjunctionMaxBulkScorer extends BulkScorer { | ||
|
||
// Same window size as BooleanScorer | ||
private static final int WINDOW_SIZE = 4096; | ||
|
||
private static class BulkScorerAndNext { | ||
public final BulkScorer scorer; | ||
public int next = 0; | ||
|
||
BulkScorerAndNext(BulkScorer scorer) { | ||
this.scorer = Objects.requireNonNull(scorer); | ||
} | ||
} | ||
|
||
// WINDOW_SIZE + 1 to ease iteration on the bit set | ||
private final FixedBitSet windowMatches = new FixedBitSet(WINDOW_SIZE + 1); | ||
private final float[] windowScores = new float[WINDOW_SIZE]; | ||
private final PriorityQueue<BulkScorerAndNext> scorers; | ||
private final SimpleScorable topLevelScorable = new SimpleScorable(); | ||
|
||
DisjunctionMaxBulkScorer(List<BulkScorer> scorers) { | ||
if (scorers.size() < 2) { | ||
throw new IllegalArgumentException(); | ||
} | ||
this.scorers = | ||
new PriorityQueue<>(scorers.size()) { | ||
@Override | ||
protected boolean lessThan(BulkScorerAndNext a, BulkScorerAndNext b) { | ||
return a.next < b.next; | ||
} | ||
}; | ||
for (BulkScorer scorer : scorers) { | ||
this.scorers.add(new BulkScorerAndNext(scorer)); | ||
} | ||
} | ||
|
||
@Override | ||
public int score(LeafCollector collector, Bits acceptDocs, int min, int max) throws IOException { | ||
BulkScorerAndNext top = scorers.top(); | ||
|
||
while (top.next < max) { | ||
final int windowMin = Math.max(top.next, min); | ||
final int windowMax = (int) Math.min(max, (long) windowMin + WINDOW_SIZE); | ||
|
||
// First compute matches / scores in the window | ||
do { | ||
top.next = | ||
top.scorer.score( | ||
new LeafCollector() { | ||
|
||
private Scorable scorer; | ||
|
||
@Override | ||
public void setScorer(Scorable scorer) throws IOException { | ||
this.scorer = scorer; | ||
if (topLevelScorable.minCompetitiveScore != 0f) { | ||
scorer.setMinCompetitiveScore(topLevelScorable.minCompetitiveScore); | ||
} | ||
} | ||
|
||
@Override | ||
public void collect(int doc) throws IOException { | ||
final int delta = doc - windowMin; | ||
windowMatches.set(doc - windowMin); | ||
windowScores[delta] = Math.max(windowScores[delta], scorer.score()); | ||
} | ||
}, | ||
acceptDocs, | ||
windowMin, | ||
windowMax); | ||
top = scorers.updateTop(); | ||
} while (top.next < windowMax); | ||
|
||
// Then replay | ||
collector.setScorer(topLevelScorable); | ||
for (int windowDoc = windowMatches.nextSetBit(0); | ||
windowDoc != DocIdSetIterator.NO_MORE_DOCS; | ||
windowDoc = windowMatches.nextSetBit(windowDoc + 1)) { | ||
int doc = windowMin + windowDoc; | ||
topLevelScorable.score = windowScores[windowDoc]; | ||
collector.collect(doc); | ||
} | ||
|
||
// Finally clean up state | ||
windowMatches.clear(); | ||
Arrays.fill(windowScores, 0f); | ||
} | ||
|
||
return top.next; | ||
} | ||
|
||
@Override | ||
public long cost() { | ||
long cost = 0; | ||
for (BulkScorerAndNext scorer : scorers) { | ||
cost += scorer.scorer.cost(); | ||
} | ||
return cost; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
48 changes: 48 additions & 0 deletions
48
lucene/core/src/java/org/apache/lucene/search/SimpleScorable.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one or more | ||
* contributor license agreements. See the NOTICE file distributed with | ||
* this work for additional information regarding copyright ownership. | ||
* The ASF licenses this file to You under the Apache License, Version 2.0 | ||
* (the "License"); you may not use this file except in compliance with | ||
* the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
package org.apache.lucene.search; | ||
|
||
import java.io.IOException; | ||
|
||
/** Simplest implementation of {@link Scorable}, implemented via simple getters and setters. */ | ||
final class SimpleScorable extends Scorable { | ||
float score; | ||
float minCompetitiveScore; | ||
|
||
/** Sole constructor. */ | ||
public SimpleScorable() {} | ||
|
||
@Override | ||
public float score() { | ||
return score; | ||
} | ||
|
||
/** Set the score. */ | ||
public void setScore(float score) { | ||
this.score = score; | ||
} | ||
|
||
/** Get the min competitive score. */ | ||
public float minCompetitiveScore() { | ||
return minCompetitiveScore; | ||
} | ||
|
||
@Override | ||
public void setMinCompetitiveScore(float minScore) throws IOException { | ||
minCompetitiveScore = minScore; | ||
} | ||
} |