Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Decode functions for range field binary encoded doc values #41206

Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@
import org.elasticsearch.index.mapper.NumberFieldMapper;
import org.elasticsearch.index.mapper.ParseContext;
import org.elasticsearch.index.mapper.RangeFieldMapper;
import org.elasticsearch.index.mapper.RangeFieldMapper.RangeType;
import org.elasticsearch.index.mapper.RangeType;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.index.query.BoostingQueryBuilder;
import org.elasticsearch.index.query.ConstantScoreQueryBuilder;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
import org.apache.lucene.search.Weight;
import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.index.mapper.RangeType;

import java.io.IOException;
import java.util.Objects;
Expand All @@ -40,13 +41,13 @@ public final class BinaryDocValuesRangeQuery extends Query {

private final String fieldName;
private final QueryType queryType;
private final LengthType lengthType;
private final RangeType.LengthType lengthType;
private final BytesRef from;
private final BytesRef to;
private final Object originalFrom;
private final Object originalTo;

public BinaryDocValuesRangeQuery(String fieldName, QueryType queryType, LengthType lengthType,
public BinaryDocValuesRangeQuery(String fieldName, QueryType queryType, RangeType.LengthType lengthType,
BytesRef from, BytesRef to,
Object originalFrom, Object originalTo) {
this.fieldName = fieldName;
Expand Down Expand Up @@ -178,42 +179,4 @@ boolean matches(BytesRef from, BytesRef to, BytesRef otherFrom, BytesRef otherTo

}

public enum LengthType {
FIXED_4 {
@Override
int readLength(byte[] bytes, int offset) {
return 4;
}
},
FIXED_8 {
@Override
int readLength(byte[] bytes, int offset) {
return 8;
}
},
FIXED_16 {
@Override
int readLength(byte[] bytes, int offset) {
return 16;
}
},
VARIABLE {
@Override
int readLength(byte[] bytes, int offset) {
// the first bit encodes the sign and the next 4 bits encode the number
// of additional bytes
int token = Byte.toUnsignedInt(bytes[offset]);
int length = (token >>> 3) & 0x0f;
if ((token & 0x80) == 0) {
length = 0x0f - length;
}
return 1 + length;
}
};

/**
* Return the length of the value that starts at {@code offset} in {@code bytes}.
*/
abstract int readLength(byte[] bytes, int offset);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,17 @@

package org.elasticsearch.index.mapper;

import org.apache.lucene.document.InetAddressPoint;
import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.ByteArrayDataOutput;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.NumericUtils;
import org.elasticsearch.common.TriFunction;

import java.io.IOException;
import java.net.InetAddress;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.List;
import java.util.Set;
Expand All @@ -33,6 +38,32 @@ enum BinaryRangeUtil {

;

static BytesRef encodeIPRanges(Set<RangeFieldMapper.Range> ranges) throws IOException {
final byte[] encoded = new byte[5 + (16 * 2) * ranges.size()];
ByteArrayDataOutput out = new ByteArrayDataOutput(encoded);
out.writeVInt(ranges.size());
for (RangeFieldMapper.Range range : ranges) {
InetAddress fromValue = (InetAddress) range.from;
byte[] encodedFromValue = InetAddressPoint.encode(fromValue);
out.writeBytes(encodedFromValue, 0, encodedFromValue.length);

InetAddress toValue = (InetAddress) range.to;
byte[] encodedToValue = InetAddressPoint.encode(toValue);
out.writeBytes(encodedToValue, 0, encodedToValue.length);
}
return new BytesRef(encoded, 0, out.getPosition());
}

static List<RangeFieldMapper.Range> decodeIPRanges(BytesRef encodedRanges) {
return decodeRanges(encodedRanges, RangeType.IP, BinaryRangeUtil::decodeIP);
}

private static InetAddress decodeIP(byte[] bytes, int offset, int length) {
// offset + length because copyOfRange wants a from and a to, not an offset & length
byte[] slice = Arrays.copyOfRange(bytes, offset, offset + length);
return InetAddressPoint.decode(slice);
}

static BytesRef encodeLongRanges(Set<RangeFieldMapper.Range> ranges) throws IOException {
List<RangeFieldMapper.Range> sortedRanges = new ArrayList<>(ranges);
Comparator<RangeFieldMapper.Range> fromComparator = Comparator.comparingLong(range -> ((Number) range.from).longValue());
Expand All @@ -51,6 +82,11 @@ static BytesRef encodeLongRanges(Set<RangeFieldMapper.Range> ranges) throws IOEx
return new BytesRef(encoded, 0, out.getPosition());
}

static List<RangeFieldMapper.Range> decodeLongRanges(BytesRef encodedRanges) {
return decodeRanges(encodedRanges, RangeType.LONG,
BinaryRangeUtil::decodeLong);
}

static BytesRef encodeDoubleRanges(Set<RangeFieldMapper.Range> ranges) throws IOException {
List<RangeFieldMapper.Range> sortedRanges = new ArrayList<>(ranges);
Comparator<RangeFieldMapper.Range> fromComparator = Comparator.comparingDouble(range -> ((Number) range.from).doubleValue());
Expand All @@ -69,6 +105,43 @@ static BytesRef encodeDoubleRanges(Set<RangeFieldMapper.Range> ranges) throws IO
return new BytesRef(encoded, 0, out.getPosition());
}

static List<RangeFieldMapper.Range> decodeDoubleRanges(BytesRef encodedRanges) {
return decodeRanges(encodedRanges, RangeType.DOUBLE,
BinaryRangeUtil::decodeDouble);
}

static List<RangeFieldMapper.Range> decodeFloatRanges(BytesRef encodedRanges) {
return decodeRanges(encodedRanges, RangeType.FLOAT,
BinaryRangeUtil::decodeFloat);
}

static List<RangeFieldMapper.Range> decodeRanges(BytesRef encodedRanges, RangeType rangeType,
TriFunction<byte[], Integer, Integer, Object> decodeBytes) {

RangeType.LengthType lengthType = rangeType.lengthType;
ByteArrayDataInput in = new ByteArrayDataInput();
in.reset(encodedRanges.bytes, encodedRanges.offset, encodedRanges.length);
int numRanges = in.readVInt();

List<RangeFieldMapper.Range> ranges = new ArrayList<>(numRanges);

final byte[] bytes = encodedRanges.bytes;
int offset = in.getPosition();
for (int i = 0; i < numRanges; i++) {
int length = lengthType.readLength(bytes, offset);
Object from = decodeBytes.apply(bytes, offset, length);
offset += length;

length = lengthType.readLength(bytes, offset);
Object to = decodeBytes.apply(bytes, offset, length);
offset += length;
// TODO: Support for exclusive ranges, pending resolution of #40601
RangeFieldMapper.Range decodedRange = new RangeFieldMapper.Range(rangeType, from, to, true, true);
ranges.add(decodedRange);
}
return ranges;
}

static BytesRef encodeFloatRanges(Set<RangeFieldMapper.Range> ranges) throws IOException {
List<RangeFieldMapper.Range> sortedRanges = new ArrayList<>(ranges);
Comparator<RangeFieldMapper.Range> fromComparator = Comparator.comparingDouble(range -> ((Number) range.from).floatValue());
Expand All @@ -93,12 +166,20 @@ static byte[] encodeDouble(double number) {
return encoded;
}

static double decodeDouble(byte[] bytes, int offset, int length){
return NumericUtils.sortableLongToDouble(NumericUtils.sortableBytesToLong(bytes, offset));
}

static byte[] encodeFloat(float number) {
byte[] encoded = new byte[4];
NumericUtils.intToSortableBytes(NumericUtils.floatToSortableInt(number), encoded, 0);
return encoded;
}

static float decodeFloat(byte[] bytes, int offset, int length) {
return NumericUtils.sortableIntToFloat(NumericUtils.sortableBytesToInt(bytes, offset));
}

/**
* Encodes the specified number of type long in a variable-length byte format.
* The byte format preserves ordering, which means the returned byte array can be used for comparing as is.
Expand All @@ -114,6 +195,23 @@ static byte[] encodeLong(long number) {
return encode(number, sign);
}

static long decodeLong(byte[] bytes, int offset, int length) {
boolean isNegative = (bytes[offset] & 128) == 0;
// Start by masking off the last three bits of the first byte - that's the start of our number
long decoded;
if (isNegative) {
decoded = -8 | bytes[offset];
} else {
decoded = bytes[offset] & 7;
}
for (int i = 1; i < length; i++) {
decoded <<= 8;
decoded += Byte.toUnsignedInt(bytes[offset + i]);
}

return decoded;
}

private static byte[] encode(long l, int sign) {
assert l >= 0;

Expand Down Expand Up @@ -158,4 +256,5 @@ private static byte[] encode(long l, int sign) {
}
return encoded;
}

}
Loading