Skip to content

Commit

Permalink
Introduce a BulkScorer for DisjunctionMaxQuery. (#14040)
Browse files Browse the repository at this point in the history
This introduces a bulk scorer for `DisjunctionMaxQuery` that delegates to the
bulk scorers of the query clauses. This helps make the performance of top-level
`DisjunctionMaxQuery` better, especially when its clauses have optimized bulk
scorers themselves (e.g. disjunctions).
  • Loading branch information
jpountz authored Dec 6, 2024
1 parent 8103f2a commit c88f933
Show file tree
Hide file tree
Showing 4 changed files with 190 additions and 0 deletions.
3 changes: 3 additions & 0 deletions lucene/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,9 @@ Optimizations
* GITHUB#14032: Speed up PostingsEnum when positions are requested.
(Adrien Grand)

* GITHUB#14040: Specialized top-level DisjunctionMaxQuery evaluation when the
tie break multiplier is 0. (Adrien Grand)

Bug Fixes
---------------------
* GITHUB#13832: Fixed an issue where the DefaultPassageFormatter.format method did not format passages as intended
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search;

import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import java.util.Objects;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.PriorityQueue;

/** Bulk scorer for {@link DisjunctionMaxQuery} when the tie-break multiplier is zero. */
final class DisjunctionMaxBulkScorer extends BulkScorer {

// Same window size as BooleanScorer
private static final int WINDOW_SIZE = 4096;

private static class BulkScorerAndNext {
public final BulkScorer scorer;
public int next = 0;

BulkScorerAndNext(BulkScorer scorer) {
this.scorer = Objects.requireNonNull(scorer);
}
}

// WINDOW_SIZE + 1 to ease iteration on the bit set
private final FixedBitSet windowMatches = new FixedBitSet(WINDOW_SIZE + 1);
private final float[] windowScores = new float[WINDOW_SIZE];
private final PriorityQueue<BulkScorerAndNext> scorers;
private final SimpleScorable topLevelScorable = new SimpleScorable();

DisjunctionMaxBulkScorer(List<BulkScorer> scorers) {
if (scorers.size() < 2) {
throw new IllegalArgumentException();
}
this.scorers =
new PriorityQueue<>(scorers.size()) {
@Override
protected boolean lessThan(BulkScorerAndNext a, BulkScorerAndNext b) {
return a.next < b.next;
}
};
for (BulkScorer scorer : scorers) {
this.scorers.add(new BulkScorerAndNext(scorer));
}
}

@Override
public int score(LeafCollector collector, Bits acceptDocs, int min, int max) throws IOException {
BulkScorerAndNext top = scorers.top();

while (top.next < max) {
final int windowMin = Math.max(top.next, min);
final int windowMax = (int) Math.min(max, (long) windowMin + WINDOW_SIZE);

// First compute matches / scores in the window
do {
top.next =
top.scorer.score(
new LeafCollector() {

private Scorable scorer;

@Override
public void setScorer(Scorable scorer) throws IOException {
this.scorer = scorer;
if (topLevelScorable.minCompetitiveScore != 0f) {
scorer.setMinCompetitiveScore(topLevelScorable.minCompetitiveScore);
}
}

@Override
public void collect(int doc) throws IOException {
final int delta = doc - windowMin;
windowMatches.set(doc - windowMin);
windowScores[delta] = Math.max(windowScores[delta], scorer.score());
}
},
acceptDocs,
windowMin,
windowMax);
top = scorers.updateTop();
} while (top.next < windowMax);

// Then replay
collector.setScorer(topLevelScorable);
for (int windowDoc = windowMatches.nextSetBit(0);
windowDoc != DocIdSetIterator.NO_MORE_DOCS;
windowDoc = windowMatches.nextSetBit(windowDoc + 1)) {
int doc = windowMin + windowDoc;
topLevelScorable.score = windowScores[windowDoc];
collector.collect(doc);
}

// Finally clean up state
windowMatches.clear();
Arrays.fill(windowScores, 0f);
}

return top.next;
}

@Override
public long cost() {
long cost = 0;
for (BulkScorerAndNext scorer : scorers) {
cost += scorer.scorer.cost();
}
return cost;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,18 @@ public Scorer get(long leadCost) throws IOException {
return new DisjunctionMaxScorer(tieBreakerMultiplier, scorers, scoreMode);
}

@Override
public BulkScorer bulkScorer() throws IOException {
if (tieBreakerMultiplier == 0f) {
List<BulkScorer> scorers = new ArrayList<>();
for (ScorerSupplier ss : scorerSuppliers) {
scorers.add(ss.bulkScorer());
}
return new DisjunctionMaxBulkScorer(scorers);
}
return super.bulkScorer();
}

@Override
public long cost() {
if (cost == -1) {
Expand Down
48 changes: 48 additions & 0 deletions lucene/core/src/java/org/apache/lucene/search/SimpleScorable.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search;

import java.io.IOException;

/** Simplest implementation of {@link Scorable}, implemented via simple getters and setters. */
final class SimpleScorable extends Scorable {
float score;
float minCompetitiveScore;

/** Sole constructor. */
public SimpleScorable() {}

@Override
public float score() {
return score;
}

/** Set the score. */
public void setScore(float score) {
this.score = score;
}

/** Get the min competitive score. */
public float minCompetitiveScore() {
return minCompetitiveScore;
}

@Override
public void setMinCompetitiveScore(float minScore) throws IOException {
minCompetitiveScore = minScore;
}
}

0 comments on commit c88f933

Please sign in to comment.