Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Skip docs with Docvalues in NumericLeafComparator #12405

Merged
merged 25 commits into from
Nov 9, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions lucene/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -262,6 +262,8 @@ Optimizations

* GITHUB#11903: Faster sort on high-cardinality string fields. (Adrien Grand)

* GITHUB#12381: Skip docs with DocValues in NumericLeafComparator. (Lu Xugang, Adrien Grand)

Changes in runtime behavior
---------------------

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.FieldComparator;
import org.apache.lucene.search.Pruning;
import org.apache.lucene.search.SimpleFieldComparator;
import org.apache.lucene.search.SortField;
import org.apache.lucene.util.BytesRef;
Expand All @@ -44,7 +45,7 @@ public FeatureSortField(String field, String featureName) {
}

@Override
public FieldComparator<?> getComparator(int numHits, boolean enableSkipping) {
public FieldComparator<?> getComparator(int numHits, Pruning pruning) {
return new FeatureComparator(numHits, getField(), featureName);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

import org.apache.lucene.geo.GeoUtils;
import org.apache.lucene.search.FieldComparator;
import org.apache.lucene.search.Pruning;
import org.apache.lucene.search.SortField;

/** Sorts by distance from an origin location. */
Expand All @@ -38,7 +39,7 @@ final class LatLonPointSortField extends SortField {
}

@Override
public FieldComparator<?> getComparator(int numHits, boolean enableSkipping) {
public FieldComparator<?> getComparator(int numHits, Pruning pruning) {
return new LatLonPointDistanceComparator(getField(), latitude, longitude, numHits);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
package org.apache.lucene.document;

import org.apache.lucene.search.FieldComparator;
import org.apache.lucene.search.Pruning;
import org.apache.lucene.search.SortField;

/** Sorts by distance from an origin location. */
Expand All @@ -35,7 +36,7 @@ final class XYPointSortField extends SortField {
}

@Override
public FieldComparator<?> getComparator(int numHits, boolean enableSkipping) {
public FieldComparator<?> getComparator(int numHits, Pruning pruning) {
return new XYPointDistanceComparator(getField(), x, y, numHits);
}

Expand Down
3 changes: 2 additions & 1 deletion lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@
import org.apache.lucene.search.FieldExistsQuery;
import org.apache.lucene.search.KnnCollector;
import org.apache.lucene.search.LeafFieldComparator;
import org.apache.lucene.search.Pruning;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TopDocs;
Expand Down Expand Up @@ -1119,7 +1120,7 @@ public static Status.IndexSortStatus testSort(

for (int i = 0; i < fields.length; i++) {
reverseMul[i] = fields[i].getReverse() ? -1 : 1;
comparators[i] = fields[i].getComparator(1, false).getLeafComparator(readerContext);
comparators[i] = fields[i].getComparator(1, Pruning.NONE).getLeafComparator(readerContext);
}

int maxDoc = reader.maxDoc();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -528,8 +528,8 @@ void setMissingValue(double missingValue) {

@Override
public FieldComparator<Double> newComparator(
String fieldname, int numHits, boolean enableSkipping, boolean reversed) {
return new DoubleComparator(numHits, fieldname, missingValue, reversed, false) {
String fieldname, int numHits, Pruning pruning, boolean reversed) {
return new DoubleComparator(numHits, fieldname, missingValue, reversed, Pruning.NONE) {
@Override
public LeafFieldComparator getLeafComparator(LeafReaderContext context) throws IOException {
DoubleValuesHolder holder = new DoubleValuesHolder();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,5 +30,5 @@ public abstract class FieldComparatorSource {
* @return FieldComparator.
*/
public abstract FieldComparator<?> newComparator(
String fieldname, int numHits, boolean enableSkipping, boolean reversed);
String fieldname, int numHits, Pruning pruning, boolean reversed);
}
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ private FieldValueHitQueue(SortField[] fields, int size) {
// need to check it again.

// All these are required by this class's API - need to return arrays.
// Therefore even in the case of a single comparator, create an array
// Therefore, even in the case of a single comparator, create an array
// anyway.
this.fields = fields;
int numComparators = fields.length;
Expand All @@ -134,7 +134,12 @@ private FieldValueHitQueue(SortField[] fields, int size) {
for (int i = 0; i < numComparators; ++i) {
SortField field = fields[i];
reverseMul[i] = field.reverse ? -1 : 1;
comparators[i] = field.getComparator(size, i == 0);
comparators[i] =
field.getComparator(
size,
i == 0
? (numComparators > 1 ? Pruning.GREATER_THAN : Pruning.GREATER_THAN_OR_EQUAL_TO)
: Pruning.NONE);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -608,7 +608,7 @@ private static ValueComparator loadComparator(
throws IOException {
@SuppressWarnings("unchecked")
FieldComparator<Number> fieldComparator =
(FieldComparator<Number>) sortField.getComparator(1, false);
(FieldComparator<Number>) sortField.getComparator(1, Pruning.NONE);
if (type == Type.INT) {
fieldComparator.setTopValue((int) topValue);
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -337,8 +337,8 @@ void setMissingValue(long missingValue) {

@Override
public FieldComparator<Long> newComparator(
String fieldname, int numHits, boolean enableSkipping, boolean reversed) {
return new LongComparator(numHits, fieldname, missingValue, reversed, false) {
String fieldname, int numHits, Pruning pruning, boolean reversed) {
return new LongComparator(numHits, fieldname, missingValue, reversed, Pruning.NONE) {
@Override
public LeafFieldComparator getLeafComparator(LeafReaderContext context) throws IOException {
LongValuesHolder holder = new LongValuesHolder();
Expand Down
33 changes: 33 additions & 0 deletions lucene/core/src/java/org/apache/lucene/search/Pruning.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search;

/** Controls {@link LeafFieldComparator} how to skip documents */
public enum Pruning {
/** Not allowed to skip documents. */
NONE,
/**
* Allowed to skip documents that compare strictly better than the top value, or strictly worse
* than the bottom value.
*/
GREATER_THAN,
/**
* Allowed to skip documents that compare better than the top value, or worse than or equal to the
* bottom value.
*/
GREATER_THAN_OR_EQUAL_TO
}
20 changes: 9 additions & 11 deletions lucene/core/src/java/org/apache/lucene/search/SortField.java
Original file line number Diff line number Diff line change
Expand Up @@ -500,50 +500,48 @@ public Comparator<BytesRef> getBytesComparator() {
*
* @lucene.experimental
* @param numHits number of top hits the queue will store
* @param enableSkipping true if the comparator can skip documents via {@link
* @param pruning controls how can the comparator to skip documents via {@link
* LeafFieldComparator#competitiveIterator()}
* @return {@link FieldComparator} to use when sorting
*/
public FieldComparator<?> getComparator(final int numHits, boolean enableSkipping) {
public FieldComparator<?> getComparator(final int numHits, Pruning pruning) {
final FieldComparator<?> fieldComparator;
switch (type) {
case SCORE:
fieldComparator = new FieldComparator.RelevanceComparator(numHits);
break;

case DOC:
fieldComparator = new DocComparator(numHits, reverse, enableSkipping);
fieldComparator = new DocComparator(numHits, reverse, pruning);
break;

case INT:
fieldComparator =
new IntComparator(numHits, field, (Integer) missingValue, reverse, enableSkipping);
new IntComparator(numHits, field, (Integer) missingValue, reverse, pruning);
break;

case FLOAT:
fieldComparator =
new FloatComparator(numHits, field, (Float) missingValue, reverse, enableSkipping);
new FloatComparator(numHits, field, (Float) missingValue, reverse, pruning);
break;

case LONG:
fieldComparator =
new LongComparator(numHits, field, (Long) missingValue, reverse, enableSkipping);
fieldComparator = new LongComparator(numHits, field, (Long) missingValue, reverse, pruning);
break;

case DOUBLE:
fieldComparator =
new DoubleComparator(numHits, field, (Double) missingValue, reverse, enableSkipping);
new DoubleComparator(numHits, field, (Double) missingValue, reverse, pruning);
break;

case CUSTOM:
assert comparatorSource != null;
fieldComparator = comparatorSource.newComparator(field, numHits, enableSkipping, reverse);
fieldComparator = comparatorSource.newComparator(field, numHits, pruning, reverse);
break;

case STRING:
fieldComparator =
new TermOrdValComparator(
numHits, field, missingValue == STRING_LAST, reverse, enableSkipping);
new TermOrdValComparator(numHits, field, missingValue == STRING_LAST, reverse, pruning);
break;

case STRING_VAL:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,7 @@ public void setMissingValue(Object missingValue) {
}

@Override
public FieldComparator<?> getComparator(int numHits, boolean enableSkipping) {
public FieldComparator<?> getComparator(int numHits, Pruning pruning) {
final FieldComparator<?> fieldComparator;
// we can use sort optimization with points if selector is MIN or MAX,
// because we can still build successful iterator over points in this case.
Expand All @@ -255,7 +255,7 @@ public FieldComparator<?> getComparator(int numHits, boolean enableSkipping) {
getField(),
(Integer) missingValue,
reverse,
enableSkipping && isMinOrMax) {
isMinOrMax ? pruning : Pruning.NONE) {
@Override
public LeafFieldComparator getLeafComparator(LeafReaderContext context)
throws IOException {
Expand All @@ -273,7 +273,11 @@ protected NumericDocValues getNumericDocValues(
case FLOAT:
fieldComparator =
new FloatComparator(
numHits, getField(), (Float) missingValue, reverse, enableSkipping && isMinOrMax) {
numHits,
getField(),
(Float) missingValue,
reverse,
isMinOrMax ? pruning : Pruning.NONE) {
@Override
public LeafFieldComparator getLeafComparator(LeafReaderContext context)
throws IOException {
Expand All @@ -291,7 +295,11 @@ protected NumericDocValues getNumericDocValues(
case LONG:
fieldComparator =
new LongComparator(
numHits, getField(), (Long) missingValue, reverse, enableSkipping && isMinOrMax) {
numHits,
getField(),
(Long) missingValue,
reverse,
isMinOrMax ? pruning : Pruning.NONE) {
@Override
public LeafFieldComparator getLeafComparator(LeafReaderContext context)
throws IOException {
Expand All @@ -309,7 +317,11 @@ protected NumericDocValues getNumericDocValues(
case DOUBLE:
fieldComparator =
new DoubleComparator(
numHits, getField(), (Double) missingValue, reverse, enableSkipping && isMinOrMax) {
numHits,
getField(),
(Double) missingValue,
reverse,
isMinOrMax ? pruning : Pruning.NONE) {
@Override
public LeafFieldComparator getLeafComparator(LeafReaderContext context)
throws IOException {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -178,10 +178,10 @@ public void setMissingValue(Object missingValue) {
}

@Override
public FieldComparator<?> getComparator(int numHits, boolean enableSkipping) {
boolean finalEnableSkipping = enableSkipping && getOptimizeSortWithIndexedData();
public FieldComparator<?> getComparator(int numHits, Pruning pruning) {
Pruning finalPruning = getOptimizeSortWithIndexedData() ? pruning : Pruning.NONE;
return new TermOrdValComparator(
numHits, getField(), missingValue == STRING_LAST, reverse, finalEnableSkipping) {
numHits, getField(), missingValue == STRING_LAST, reverse, finalPruning) {
@Override
protected SortedDocValues getSortedDocValues(LeafReaderContext context, String field)
throws IOException {
Expand Down
2 changes: 1 addition & 1 deletion lucene/core/src/java/org/apache/lucene/search/TopDocs.java
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ public MergeSortQueue(Sort sort, TopDocs[] shardHits, Comparator<ScoreDoc> tieBr
reverseMul = new int[sortFields.length];
for (int compIDX = 0; compIDX < sortFields.length; compIDX++) {
final SortField sortField = sortFields[compIDX];
comparators[compIDX] = sortField.getComparator(1, compIDX == 0);
comparators[compIDX] = sortField.getComparator(1, Pruning.NONE);
reverseMul[compIDX] = sortField.getReverse() ? -1 : 1;
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.FieldComparator;
import org.apache.lucene.search.LeafFieldComparator;
import org.apache.lucene.search.Pruning;
import org.apache.lucene.search.Scorable;

/** Comparator that sorts by asc _doc */
Expand All @@ -35,10 +36,10 @@ public class DocComparator extends FieldComparator<Integer> {
private boolean hitsThresholdReached;

/** Creates a new comparator based on document ids for {@code numHits} */
public DocComparator(int numHits, boolean reverse, boolean enableSkipping) {
public DocComparator(int numHits, boolean reverse, Pruning pruning) {
this.docIDs = new int[numHits];
// skipping functionality is enabled if we are sorting by _doc in asc order as a primary sort
this.enableSkipping = (reverse == false && enableSkipping);
this.enableSkipping = (reverse == false && pruning != Pruning.NONE);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import org.apache.lucene.document.DoublePoint;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.LeafFieldComparator;
import org.apache.lucene.search.Pruning;

/**
* Comparator based on {@link Double#compare} for {@code numHits}. This comparator provides a
Expand All @@ -32,8 +33,8 @@ public class DoubleComparator extends NumericComparator<Double> {
protected double bottom;

public DoubleComparator(
int numHits, String field, Double missingValue, boolean reverse, boolean enableSkipping) {
super(field, missingValue != null ? missingValue : 0.0, reverse, enableSkipping, Double.BYTES);
int numHits, String field, Double missingValue, boolean reverse, Pruning pruning) {
super(field, missingValue != null ? missingValue : 0.0, reverse, pruning, Double.BYTES);
values = new double[numHits];
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import org.apache.lucene.document.FloatPoint;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.LeafFieldComparator;
import org.apache.lucene.search.Pruning;

/**
* Comparator based on {@link Float#compare} for {@code numHits}. This comparator provides a
Expand All @@ -32,8 +33,8 @@ public class FloatComparator extends NumericComparator<Float> {
protected float bottom;

public FloatComparator(
int numHits, String field, Float missingValue, boolean reverse, boolean enableSkipping) {
super(field, missingValue != null ? missingValue : 0.0f, reverse, enableSkipping, Float.BYTES);
int numHits, String field, Float missingValue, boolean reverse, Pruning pruning) {
super(field, missingValue != null ? missingValue : 0.0f, reverse, pruning, Float.BYTES);
values = new float[numHits];
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import org.apache.lucene.document.IntPoint;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.LeafFieldComparator;
import org.apache.lucene.search.Pruning;

/**
* Comparator based on {@link Integer#compare} for {@code numHits}. This comparator provides a
Expand All @@ -32,8 +33,8 @@ public class IntComparator extends NumericComparator<Integer> {
protected int bottom;

public IntComparator(
int numHits, String field, Integer missingValue, boolean reverse, boolean enableSkipping) {
super(field, missingValue != null ? missingValue : 0, reverse, enableSkipping, Integer.BYTES);
int numHits, String field, Integer missingValue, boolean reverse, Pruning pruning) {
super(field, missingValue != null ? missingValue : 0, reverse, pruning, Integer.BYTES);
values = new int[numHits];
}

Expand Down
Loading
Loading