Skip to content

Commit

Permalink
Disable dynamic pruning on unindexed fields. (#107194)
Browse files Browse the repository at this point in the history
In order to know whether it can apply dynamic pruning using the points index,
Lucene simply looks at whether a field has points. Unfortunately, this doesn't
work well with our support for archive indexes, where numeric/date fields
report that they have points, but they only support metadata operations on
these points (min/max values, doc count), with the goal of quickly filtering
out such archive indexes during the `can_match` phase.

In order to address this discrepancy, dynamic pruning is now disabled when
mappings report that a field is not indexed. This works because archive indexes
automatically set `index: false` to make sure that filters run on doc values
and not points. However, this is not a great fix as this increases our reliance
on disabling dynamic pruning, which is currently marked as deprecated and
scheduled for removal in the next Lucene major. So we'll need to either add it
back to Lucene or find another approach.

Closes #107168
  • Loading branch information
jpountz authored Apr 9, 2024
1 parent c50fcb9 commit 62f19e3
Show file tree
Hide file tree
Showing 20 changed files with 160 additions and 39 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -340,7 +340,8 @@ public IndexFieldData.Builder fielddataBuilder(FieldDataContext fieldDataContext
valuesSourceType,
(dv, n) -> {
throw new UnsupportedOperationException();
}
},
isIndexed()
).build(cache, breakerService);
return new ScaledFloatIndexFieldData(scaledValues, scalingFactor, ScaledFloatDocValuesField::new);
};
Expand Down Expand Up @@ -608,6 +609,11 @@ protected boolean sortRequiresCustomComparator() {
return true;
}

@Override
protected boolean isIndexed() {
return false; // We don't know how to take advantage of the index with half floats anyway
}

@Override
public NumericType getNumericType() {
/*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ public String typeName() {
@Override
public IndexFieldData.Builder fielddataBuilder(FieldDataContext fieldDataContext) {
failIfNoDocValues();
return new SortedNumericIndexFieldData.Builder(name(), NumericType.LONG, Murmur3DocValueField::new);
return new SortedNumericIndexFieldData.Builder(name(), NumericType.LONG, Murmur3DocValueField::new, isIndexed());
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,11 @@ protected boolean sortRequiresCustomComparator() {
return true;
}

@Override
protected boolean isIndexed() {
return false;
}

public static class BooleanScriptLeafFieldData extends LeafLongFieldData {
private final BooleanScriptDocValues booleanScriptDocValues;
protected final ToScriptFieldFactory<SortedNumericDocValues> toScriptFieldFactory;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,11 @@ protected boolean sortRequiresCustomComparator() {
return true;
}

@Override
protected boolean isIndexed() {
return false;
}

public static class DateScriptLeafFieldData extends LeafLongFieldData {
private final LongScriptDocValues longScriptDocValues;
protected final ToScriptFieldFactory<SortedNumericDocValues> toScriptFieldFactory;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,11 @@ protected boolean sortRequiresCustomComparator() {
return true;
}

@Override
protected boolean isIndexed() {
return false;
}

public static class DoubleScriptLeafFieldData extends LeafDoubleFieldData {
private final DoubleScriptDocValues doubleScriptDocValues;
protected final ToScriptFieldFactory<SortedNumericDoubleValues> toScriptFieldFactory;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@ public final SortField sortField(
case LONG:
case DOUBLE:
// longs, doubles and dates use the same type for doc-values and points.
sortField.setOptimizeSortWithPoints(isIndexed());
break;

default:
Expand All @@ -132,12 +133,18 @@ public final SortField sortField(
}

/**
* Does {@link #sortField} require a custom comparator because of the way
* the data is stored in doc values ({@code true}) or are the docs values
* stored such that they can be sorted without decoding ({@code false}).
* Should sorting use a custom comparator source vs. rely on a Lucene {@link SortField}. Using a Lucene {@link SortField} when possible
* is important because index sorting cannot be configured with a custom comparator, and because it gives better performance by
* dynamically pruning irrelevant hits. On the other hand, Lucene {@link SortField}s are less flexible and make stronger assumptions
* about how the data is indexed. Therefore, they cannot be used in all cases.
*/
protected abstract boolean sortRequiresCustomComparator();

/**
* Return true if, and only if the field is indexed with points that match the content of doc values.
*/
protected abstract boolean isIndexed();

@Override
public final SortField sortField(Object missingValue, MultiValueMode sortMode, Nested nested, boolean reverse) {
return sortField(getNumericType(), missingValue, sortMode, nested, reverse);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,11 @@ protected boolean sortRequiresCustomComparator() {
return true;
}

@Override
protected boolean isIndexed() {
return false;
}

public static class LongScriptLeafFieldData extends LeafLongFieldData {
private final LongScriptDocValues longScriptDocValues;
protected final ToScriptFieldFactory<SortedNumericDocValues> toScriptFieldFactory;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,41 +42,47 @@ public static class Builder implements IndexFieldData.Builder {
private final NumericType numericType;
private final ValuesSourceType valuesSourceType;
protected final ToScriptFieldFactory<SortedNumericDoubleValues> toScriptFieldFactory;
private final boolean indexed;

public Builder(
String name,
NumericType numericType,
ValuesSourceType valuesSourceType,
ToScriptFieldFactory<SortedNumericDoubleValues> toScriptFieldFactory
ToScriptFieldFactory<SortedNumericDoubleValues> toScriptFieldFactory,
boolean indexed
) {
this.name = name;
this.numericType = numericType;
this.valuesSourceType = valuesSourceType;
this.toScriptFieldFactory = toScriptFieldFactory;
this.indexed = indexed;
}

@Override
public SortedDoublesIndexFieldData build(IndexFieldDataCache cache, CircuitBreakerService breakerService) {
return new SortedDoublesIndexFieldData(name, numericType, valuesSourceType, toScriptFieldFactory);
return new SortedDoublesIndexFieldData(name, numericType, valuesSourceType, toScriptFieldFactory, indexed);
}
}

private final NumericType numericType;
protected final String fieldName;
protected final ValuesSourceType valuesSourceType;
protected final ToScriptFieldFactory<SortedNumericDoubleValues> toScriptFieldFactory;
protected final boolean indexed;

public SortedDoublesIndexFieldData(
String fieldName,
NumericType numericType,
ValuesSourceType valuesSourceType,
ToScriptFieldFactory<SortedNumericDoubleValues> toScriptFieldFactory
ToScriptFieldFactory<SortedNumericDoubleValues> toScriptFieldFactory,
boolean indexed
) {
this.fieldName = fieldName;
this.numericType = Objects.requireNonNull(numericType);
assert this.numericType.isFloatingPoint();
this.valuesSourceType = valuesSourceType;
this.toScriptFieldFactory = toScriptFieldFactory;
this.indexed = indexed;
}

@Override
Expand All @@ -94,6 +100,11 @@ protected boolean sortRequiresCustomComparator() {
return numericType == NumericType.HALF_FLOAT;
}

@Override
public boolean isIndexed() {
return indexed;
}

@Override
public NumericType getNumericType() {
return numericType;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,45 +42,56 @@ public static class Builder implements IndexFieldData.Builder {
private final NumericType numericType;
private final ValuesSourceType valuesSourceType;
protected final ToScriptFieldFactory<SortedNumericDocValues> toScriptFieldFactory;
private final boolean indexed;

public Builder(String name, NumericType numericType, ToScriptFieldFactory<SortedNumericDocValues> toScriptFieldFactory) {
this(name, numericType, numericType.getValuesSourceType(), toScriptFieldFactory);
public Builder(
String name,
NumericType numericType,
ToScriptFieldFactory<SortedNumericDocValues> toScriptFieldFactory,
boolean indexed
) {
this(name, numericType, numericType.getValuesSourceType(), toScriptFieldFactory, indexed);
}

public Builder(
String name,
NumericType numericType,
ValuesSourceType valuesSourceType,
ToScriptFieldFactory<SortedNumericDocValues> toScriptFieldFactory
ToScriptFieldFactory<SortedNumericDocValues> toScriptFieldFactory,
boolean indexed
) {
this.name = name;
this.numericType = numericType;
this.valuesSourceType = valuesSourceType;
this.toScriptFieldFactory = toScriptFieldFactory;
this.indexed = indexed;
}

@Override
public SortedNumericIndexFieldData build(IndexFieldDataCache cache, CircuitBreakerService breakerService) {
return new SortedNumericIndexFieldData(name, numericType, valuesSourceType, toScriptFieldFactory);
return new SortedNumericIndexFieldData(name, numericType, valuesSourceType, toScriptFieldFactory, indexed);
}
}

private final NumericType numericType;
protected final String fieldName;
protected final ValuesSourceType valuesSourceType;
protected final ToScriptFieldFactory<SortedNumericDocValues> toScriptFieldFactory;
protected final boolean indexed;

public SortedNumericIndexFieldData(
String fieldName,
NumericType numericType,
ValuesSourceType valuesSourceType,
ToScriptFieldFactory<SortedNumericDocValues> toScriptFieldFactory
ToScriptFieldFactory<SortedNumericDocValues> toScriptFieldFactory,
boolean indexed
) {
this.fieldName = fieldName;
this.numericType = Objects.requireNonNull(numericType);
assert this.numericType.isFloatingPoint() == false;
this.valuesSourceType = valuesSourceType;
this.toScriptFieldFactory = toScriptFieldFactory;
this.indexed = indexed;
}

@Override
Expand All @@ -98,6 +109,11 @@ protected boolean sortRequiresCustomComparator() {
return false;
}

@Override
public boolean isIndexed() {
return indexed;
}

@Override
protected XFieldComparatorSource dateComparatorSource(Object missingValue, MultiValueMode sortMode, Nested nested) {
if (numericType == NumericType.DATE_NANOSECONDS) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -276,7 +276,9 @@ public IndexFieldData.Builder fielddataBuilder(FieldDataContext fieldDataContext
}

if ((operation == FielddataOperation.SEARCH || operation == FielddataOperation.SCRIPT) && hasDocValues()) {
return new SortedNumericIndexFieldData.Builder(name(), NumericType.BOOLEAN, BooleanDocValuesField::new);
// boolean fields are indexed, but not with points
boolean indexed = false;
return new SortedNumericIndexFieldData.Builder(name(), NumericType.BOOLEAN, BooleanDocValuesField::new, indexed);
}

if (operation == FielddataOperation.SCRIPT) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -795,7 +795,8 @@ public IndexFieldData.Builder fielddataBuilder(FieldDataContext fieldDataContext
return new SortedNumericIndexFieldData.Builder(
name(),
resolution.numericType(),
resolution.getDefaultToScriptFieldFactory()
resolution.getDefaultToScriptFieldFactory(),
isIndexed()
);
}

Expand Down
Loading

0 comments on commit 62f19e3

Please sign in to comment.