-
Notifications
You must be signed in to change notification settings - Fork 24.9k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Query range fields by doc values when they are expected to be more ef…
…ficient than points. * Enable doc values for range fields by default. * Store ranges in a binary format that support multi field fields. * Added BinaryDocValuesRangeQuery that can query ranges that have been encoded into a binary doc values field. * Wrap range queries on a range field in IndexOrDocValuesQuery query. Closes #24314
- Loading branch information
Showing
13 changed files
with
1,679 additions
and
62 deletions.
There are no files selected for viewing
164 changes: 164 additions & 0 deletions
164
core/src/main/java/org/apache/lucene/queries/BinaryDocValuesRangeQuery.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,164 @@ | ||
/* | ||
* Licensed to Elasticsearch under one or more contributor | ||
* license agreements. See the NOTICE file distributed with | ||
* this work for additional information regarding copyright | ||
* ownership. Elasticsearch licenses this file to you under | ||
* the Apache License, Version 2.0 (the "License"); you may | ||
* not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, | ||
* software distributed under the License is distributed on an | ||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
* KIND, either express or implied. See the License for the | ||
* specific language governing permissions and limitations | ||
* under the License. | ||
*/ | ||
package org.apache.lucene.queries; | ||
|
||
import org.apache.lucene.index.BinaryDocValues; | ||
import org.apache.lucene.index.LeafReaderContext; | ||
import org.apache.lucene.search.ConstantScoreScorer; | ||
import org.apache.lucene.search.ConstantScoreWeight; | ||
import org.apache.lucene.search.IndexSearcher; | ||
import org.apache.lucene.search.Query; | ||
import org.apache.lucene.search.Scorer; | ||
import org.apache.lucene.search.TwoPhaseIterator; | ||
import org.apache.lucene.search.Weight; | ||
import org.apache.lucene.store.ByteArrayDataInput; | ||
import org.apache.lucene.util.BytesRef; | ||
|
||
import java.io.IOException; | ||
import java.util.Objects; | ||
|
||
public final class BinaryDocValuesRangeQuery extends Query { | ||
|
||
private final String fieldName; | ||
private final QueryType queryType; | ||
private final BytesRef from; | ||
private final BytesRef to; | ||
private final Object originalFrom; | ||
private final Object originalTo; | ||
|
||
public BinaryDocValuesRangeQuery(String fieldName, QueryType queryType, BytesRef from, BytesRef to, | ||
Object originalFrom, Object originalTo) { | ||
this.fieldName = fieldName; | ||
this.queryType = queryType; | ||
this.from = from; | ||
this.to = to; | ||
this.originalFrom = originalFrom; | ||
this.originalTo = originalTo; | ||
} | ||
|
||
@Override | ||
public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException { | ||
return new ConstantScoreWeight(this, boost) { | ||
|
||
@Override | ||
public Scorer scorer(LeafReaderContext context) throws IOException { | ||
final BinaryDocValues values = context.reader().getBinaryDocValues(fieldName); | ||
if (values == null) { | ||
return null; | ||
} | ||
|
||
final TwoPhaseIterator iterator = new TwoPhaseIterator(values) { | ||
|
||
ByteArrayDataInput in = new ByteArrayDataInput(); | ||
BytesRef otherFrom = new BytesRef(16); | ||
BytesRef otherTo = new BytesRef(16); | ||
|
||
@Override | ||
public boolean matches() throws IOException { | ||
BytesRef encodedRanges = values.binaryValue(); | ||
in.reset(encodedRanges.bytes, encodedRanges.offset, encodedRanges.length); | ||
int numRanges = in.readVInt(); | ||
for (int i = 0; i < numRanges; i++) { | ||
otherFrom.length = in.readVInt(); | ||
otherFrom.bytes = encodedRanges.bytes; | ||
otherFrom.offset = in.getPosition(); | ||
in.skipBytes(otherFrom.length); | ||
|
||
otherTo.length = in.readVInt(); | ||
otherTo.bytes = encodedRanges.bytes; | ||
otherTo.offset = in.getPosition(); | ||
in.skipBytes(otherTo.length); | ||
|
||
if (queryType.matches(from, to, otherFrom, otherTo)) { | ||
return true; | ||
} | ||
} | ||
return false; | ||
} | ||
|
||
@Override | ||
public float matchCost() { | ||
return 4; // at most 4 comparisons | ||
} | ||
}; | ||
return new ConstantScoreScorer(this, score(), iterator); | ||
} | ||
}; | ||
} | ||
|
||
@Override | ||
public String toString(String field) { | ||
return "BinaryDocValuesRangeQuery(fieldName=" + field + ",from=" + originalFrom + ",to=" + originalTo + ")"; | ||
} | ||
|
||
@Override | ||
public boolean equals(Object o) { | ||
if (this == o) return true; | ||
if (o == null || getClass() != o.getClass()) return false; | ||
BinaryDocValuesRangeQuery that = (BinaryDocValuesRangeQuery) o; | ||
return Objects.equals(fieldName, that.fieldName) && | ||
queryType == that.queryType && | ||
Objects.equals(from, that.from) && | ||
Objects.equals(to, that.to); | ||
} | ||
|
||
@Override | ||
public int hashCode() { | ||
return Objects.hash(getClass(), fieldName, queryType, from, to); | ||
} | ||
|
||
public enum QueryType { | ||
INTERSECTS { | ||
@Override | ||
boolean matches(BytesRef from, BytesRef to, BytesRef otherFrom, BytesRef otherTo) { | ||
// part of the other range must touch this range | ||
// this: |---------------| | ||
// other: |------| | ||
return from.compareTo(otherTo) <= 0 && to.compareTo(otherFrom) >= 0; | ||
} | ||
}, WITHIN { | ||
@Override | ||
boolean matches(BytesRef from, BytesRef to, BytesRef otherFrom, BytesRef otherTo) { | ||
// other range must entirely lie within this range | ||
// this: |---------------| | ||
// other: |------| | ||
return from.compareTo(otherFrom) <= 0 && to.compareTo(otherTo) >= 0; | ||
} | ||
}, CONTAINS { | ||
@Override | ||
boolean matches(BytesRef from, BytesRef to, BytesRef otherFrom, BytesRef otherTo) { | ||
// this and other range must overlap | ||
// this: |------| | ||
// other: |---------------| | ||
return from.compareTo(otherFrom) >= 0 && to.compareTo(otherTo) <= 0; | ||
} | ||
}, CROSSES { | ||
@Override | ||
boolean matches(BytesRef from, BytesRef to, BytesRef otherFrom, BytesRef otherTo) { | ||
// does not disjoint AND not within: | ||
return (from.compareTo(otherTo) > 0 || to.compareTo(otherFrom) < 0) == false && | ||
(from.compareTo(otherFrom) <= 0 && to.compareTo(otherTo) >= 0) == false; | ||
} | ||
}; | ||
|
||
abstract boolean matches(BytesRef from, BytesRef to, BytesRef otherFrom, BytesRef otherTo); | ||
|
||
} | ||
|
||
} |
146 changes: 146 additions & 0 deletions
146
core/src/main/java/org/elasticsearch/index/mapper/BinaryRangeUtil.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,146 @@ | ||
/* | ||
* Licensed to Elasticsearch under one or more contributor | ||
* license agreements. See the NOTICE file distributed with | ||
* this work for additional information regarding copyright | ||
* ownership. Elasticsearch licenses this file to you under | ||
* the Apache License, Version 2.0 (the "License"); you may | ||
* not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, | ||
* software distributed under the License is distributed on an | ||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
* KIND, either express or implied. See the License for the | ||
* specific language governing permissions and limitations | ||
* under the License. | ||
*/ | ||
package org.elasticsearch.index.mapper; | ||
|
||
import org.apache.lucene.store.ByteArrayDataOutput; | ||
import org.apache.lucene.util.BytesRef; | ||
|
||
import java.io.IOException; | ||
import java.util.ArrayList; | ||
import java.util.List; | ||
import java.util.Set; | ||
|
||
enum BinaryRangeUtil { | ||
|
||
; | ||
|
||
static BytesRef encodeLongRanges(Set<RangeFieldMapper.Range> ranges) throws IOException { | ||
List<RangeFieldMapper.Range> sortedRanges = new ArrayList<>(ranges); | ||
sortedRanges.sort((r1, r2) -> { | ||
long r1From = ((Number) r1.from).longValue(); | ||
long r2From = ((Number) r2.from).longValue(); | ||
int cmp = Long.compare(r1From, r2From); | ||
if (cmp != 0) { | ||
return cmp; | ||
} else { | ||
long r1To = ((Number) r1.from).longValue(); | ||
long r2To = ((Number) r2.from).longValue(); | ||
return Long.compare(r1To, r2To); | ||
} | ||
}); | ||
|
||
final byte[] encoded = new byte[5 + ((5 + 9) * 2) * sortedRanges.size()]; | ||
ByteArrayDataOutput out = new ByteArrayDataOutput(encoded); | ||
out.writeVInt(sortedRanges.size()); | ||
for (RangeFieldMapper.Range range : sortedRanges) { | ||
byte[] encodedFrom = encode(((Number) range.from).longValue()); | ||
out.writeVInt(encodedFrom.length); | ||
out.writeBytes(encodedFrom, encodedFrom.length); | ||
byte[] encodedTo = encode(((Number) range.to).longValue()); | ||
out.writeVInt(encodedTo.length); | ||
out.writeBytes(encodedTo, encodedTo.length); | ||
} | ||
return new BytesRef(encoded, 0, out.getPosition()); | ||
} | ||
|
||
static BytesRef encodeDoubleRanges(Set<RangeFieldMapper.Range> ranges) throws IOException { | ||
List<RangeFieldMapper.Range> sortedRanges = new ArrayList<>(ranges); | ||
sortedRanges.sort((r1, r2) -> { | ||
double r1From = ((Number) r1.from).doubleValue(); | ||
double r2From = ((Number) r2.from).doubleValue(); | ||
int cmp = Double.compare(r1From, r2From); | ||
if (cmp != 0) { | ||
return cmp; | ||
} else { | ||
double r1To = ((Number) r1.from).doubleValue(); | ||
double r2To = ((Number) r2.from).doubleValue(); | ||
return Double.compare(r1To, r2To); | ||
} | ||
}); | ||
|
||
final byte[] encoded = new byte[5 + ((5 + 9) * 2) * sortedRanges.size()]; | ||
ByteArrayDataOutput out = new ByteArrayDataOutput(encoded); | ||
out.writeVInt(sortedRanges.size()); | ||
for (RangeFieldMapper.Range range : sortedRanges) { | ||
byte[] encodedFrom = BinaryRangeUtil.encode(((Number) range.from).doubleValue()); | ||
out.writeVInt(encodedFrom.length); | ||
out.writeBytes(encodedFrom, encodedFrom.length); | ||
byte[] encodedTo = BinaryRangeUtil.encode(((Number) range.to).doubleValue()); | ||
out.writeVInt(encodedTo.length); | ||
out.writeBytes(encodedTo, encodedTo.length); | ||
} | ||
return new BytesRef(encoded, 0, out.getPosition()); | ||
} | ||
|
||
/** | ||
* Encodes the specified number of type long in a variable-length byte format. | ||
* The byte format preserves ordering, which means the returned byte array can be used for comparing as is. | ||
*/ | ||
static byte[] encode(long number) { | ||
int sign = 1; // means positive | ||
if (number < 0) { | ||
number = -1 - number; | ||
sign = 0; | ||
} | ||
return encode(number, sign); | ||
} | ||
|
||
/** | ||
* Encodes the specified number of type double in a variable-length byte format. | ||
* The byte format preserves ordering, which means the returned byte array can be used for comparing as is. | ||
*/ | ||
static byte[] encode(double number) { | ||
long l; | ||
int sign; | ||
if (number < 0.0) { | ||
l = Double.doubleToRawLongBits(-0d - number); | ||
sign = 0; | ||
} else { | ||
l = Double.doubleToRawLongBits(number); | ||
sign = 1; // means positive | ||
} | ||
return encode(l, sign); | ||
} | ||
|
||
private static byte[] encode(long l, int sign) { | ||
assert l >= 0; | ||
int bits = 64 - Long.numberOfLeadingZeros(l); | ||
|
||
int numBytes = (bits + 7) / 8; // between 0 and 8 | ||
byte[] encoded = new byte[1 + numBytes]; | ||
// encode the sign first to make sure positive values compare greater than negative values | ||
// and then the number of bytes, to make sure that large values compare greater than low values | ||
if (sign > 0) { | ||
encoded[0] = (byte) ((sign << 4) | numBytes); | ||
} else { | ||
encoded[0] = (byte) ((sign << 4) | (8 - numBytes)); | ||
} | ||
for (int b = 0; b < numBytes; ++b) { | ||
if (sign == 1) { | ||
encoded[encoded.length - 1 - b] = (byte) (l >>> (8 * b)); | ||
} else if (sign == 0) { | ||
encoded[encoded.length - 1 - b] = (byte) (0xFF - ((l >>> (8 * b)) & 0xFF)); | ||
} else { | ||
throw new AssertionError(); | ||
} | ||
} | ||
return encoded; | ||
} | ||
|
||
} |
Oops, something went wrong.