Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Decode functions for range field binary encoded doc values #41206

Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
import org.apache.lucene.search.Weight;
import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.index.mapper.BinaryRangeUtil;

import java.io.IOException;
import java.util.Objects;
Expand All @@ -40,13 +41,13 @@ public final class BinaryDocValuesRangeQuery extends Query {

private final String fieldName;
private final QueryType queryType;
private final LengthType lengthType;
private final BinaryRangeUtil.LengthType lengthType;
private final BytesRef from;
private final BytesRef to;
private final Object originalFrom;
private final Object originalTo;

public BinaryDocValuesRangeQuery(String fieldName, QueryType queryType, LengthType lengthType,
public BinaryDocValuesRangeQuery(String fieldName, QueryType queryType, BinaryRangeUtil.LengthType lengthType,
BytesRef from, BytesRef to,
Object originalFrom, Object originalTo) {
this.fieldName = fieldName;
Expand Down Expand Up @@ -178,42 +179,4 @@ boolean matches(BytesRef from, BytesRef to, BytesRef otherFrom, BytesRef otherTo

}

public enum LengthType {
FIXED_4 {
@Override
int readLength(byte[] bytes, int offset) {
return 4;
}
},
FIXED_8 {
@Override
int readLength(byte[] bytes, int offset) {
return 8;
}
},
FIXED_16 {
@Override
int readLength(byte[] bytes, int offset) {
return 16;
}
},
VARIABLE {
@Override
int readLength(byte[] bytes, int offset) {
// the first bit encodes the sign and the next 4 bits encode the number
// of additional bytes
int token = Byte.toUnsignedInt(bytes[offset]);
int length = (token >>> 3) & 0x0f;
if ((token & 0x80) == 0) {
length = 0x0f - length;
}
return 1 + length;
}
};

/**
* Return the length of the value that starts at {@code offset} in {@code bytes}.
*/
abstract int readLength(byte[] bytes, int offset);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,20 +19,51 @@

package org.elasticsearch.index.mapper;

import org.apache.lucene.document.InetAddressPoint;
import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.ByteArrayDataOutput;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.NumericUtils;
import org.elasticsearch.common.TriFunction;

import java.io.IOException;
import java.net.InetAddress;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.List;
import java.util.Set;

enum BinaryRangeUtil {
public enum BinaryRangeUtil {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we extract LengthType in his own file and leave this class package protected ? The encoding should remain internal.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So, I feel really bad about making LengthType a top level enum; it's very much an implementation detail of the range encoding. The more I thought about it, the more I came to feel it really should be part of RangeType, and my only objection to putting it there in the first place was that RangeFieldMapper is already 1000 lines and defines half a dozen classes. So I made RangeType a top level enum and put LengthType under that. RangeType needs to be public anyway, so there's no increased API surface with this arrangement.

This seems like the most natural refactoring to me, since LengthType is a direct function of RangeType, but I'm open to rolling that back and just making LengthType a top level if you feel strongly that's the right way to do this. Thanks for the feedback!

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok fine with me, thanks for explaining


;

static BytesRef encodeIPRanges(Set<RangeFieldMapper.Range> ranges) throws IOException {
final byte[] encoded = new byte[5 + (16 * 2) * ranges.size()];
ByteArrayDataOutput out = new ByteArrayDataOutput(encoded);
out.writeVInt(ranges.size());
for (RangeFieldMapper.Range range : ranges) {
InetAddress fromValue = (InetAddress) range.from;
byte[] encodedFromValue = InetAddressPoint.encode(fromValue);
out.writeBytes(encodedFromValue, 0, encodedFromValue.length);

InetAddress toValue = (InetAddress) range.to;
byte[] encodedToValue = InetAddressPoint.encode(toValue);
out.writeBytes(encodedToValue, 0, encodedToValue.length);
}
return new BytesRef(encoded, 0, out.getPosition());
}

static List<RangeFieldMapper.Range> decodeIPRanges(BytesRef encodedRanges) {
return decodeRanges(encodedRanges, RangeFieldMapper.RangeType.IP, BinaryRangeUtil::decodeIP);
}

private static InetAddress decodeIP(byte[] bytes, int offset, int length) {
// offset + length because copyOfRange wants a from and a to, not an offset & length
byte[] slice = Arrays.copyOfRange(bytes, offset, offset + length);
return InetAddressPoint.decode(slice);
}

static BytesRef encodeLongRanges(Set<RangeFieldMapper.Range> ranges) throws IOException {
List<RangeFieldMapper.Range> sortedRanges = new ArrayList<>(ranges);
Comparator<RangeFieldMapper.Range> fromComparator = Comparator.comparingLong(range -> ((Number) range.from).longValue());
Expand All @@ -51,6 +82,11 @@ static BytesRef encodeLongRanges(Set<RangeFieldMapper.Range> ranges) throws IOEx
return new BytesRef(encoded, 0, out.getPosition());
}

static List<RangeFieldMapper.Range> decodeLongRanges(BytesRef encodedRanges) {
return decodeRanges(encodedRanges, RangeFieldMapper.RangeType.LONG,
BinaryRangeUtil::decodeLong);
}

static BytesRef encodeDoubleRanges(Set<RangeFieldMapper.Range> ranges) throws IOException {
List<RangeFieldMapper.Range> sortedRanges = new ArrayList<>(ranges);
Comparator<RangeFieldMapper.Range> fromComparator = Comparator.comparingDouble(range -> ((Number) range.from).doubleValue());
Expand All @@ -69,6 +105,43 @@ static BytesRef encodeDoubleRanges(Set<RangeFieldMapper.Range> ranges) throws IO
return new BytesRef(encoded, 0, out.getPosition());
}

static List<RangeFieldMapper.Range> decodeDoubleRanges(BytesRef encodedRanges) {
return decodeRanges(encodedRanges, RangeFieldMapper.RangeType.DOUBLE,
BinaryRangeUtil::decodeDouble);
}

static List<RangeFieldMapper.Range> decodeFloatRanges(BytesRef encodedRanges) {
return decodeRanges(encodedRanges, RangeFieldMapper.RangeType.FLOAT,
BinaryRangeUtil::decodeFloat);
}

static List<RangeFieldMapper.Range> decodeRanges(BytesRef encodedRanges, RangeFieldMapper.RangeType rangeType,
TriFunction<byte[], Integer, Integer, Object> decodeBytes) {

LengthType lengthType = rangeType.lengthType;
ByteArrayDataInput in = new ByteArrayDataInput();
in.reset(encodedRanges.bytes, encodedRanges.offset, encodedRanges.length);
int numRanges = in.readVInt();

List<RangeFieldMapper.Range> ranges = new ArrayList<>(numRanges);

final byte[] bytes = encodedRanges.bytes;
int offset = in.getPosition();
for (int i = 0; i < numRanges; i++) {
int length = lengthType.readLength(bytes, offset);
Object from = decodeBytes.apply(bytes, offset, length);
offset += length;

length = lengthType.readLength(bytes, offset);
Object to = decodeBytes.apply(bytes, offset, length);
offset += length;
// TODO: Support for exclusive ranges, pending resolution of #40601
RangeFieldMapper.Range decodedRange = new RangeFieldMapper.Range(rangeType, from, to, true, true);
ranges.add(decodedRange);
}
return ranges;
}

static BytesRef encodeFloatRanges(Set<RangeFieldMapper.Range> ranges) throws IOException {
List<RangeFieldMapper.Range> sortedRanges = new ArrayList<>(ranges);
Comparator<RangeFieldMapper.Range> fromComparator = Comparator.comparingDouble(range -> ((Number) range.from).floatValue());
Expand All @@ -93,12 +166,20 @@ static byte[] encodeDouble(double number) {
return encoded;
}

static double decodeDouble(byte[] bytes, int offset, int length){
return NumericUtils.sortableLongToDouble(NumericUtils.sortableBytesToLong(bytes, offset));
}

static byte[] encodeFloat(float number) {
byte[] encoded = new byte[4];
NumericUtils.intToSortableBytes(NumericUtils.floatToSortableInt(number), encoded, 0);
return encoded;
}

static float decodeFloat(byte[] bytes, int offset, int length) {
return NumericUtils.sortableIntToFloat(NumericUtils.sortableBytesToInt(bytes, offset));
}

/**
* Encodes the specified number of type long in a variable-length byte format.
* The byte format preserves ordering, which means the returned byte array can be used for comparing as is.
Expand All @@ -114,6 +195,23 @@ static byte[] encodeLong(long number) {
return encode(number, sign);
}

static long decodeLong(byte[] bytes, int offset, int length) {
boolean isNegative = (bytes[offset] & 128) == 0;
// Start by masking off the last three bits of the first byte - that's the start of our number
long decoded;
if (isNegative) {
decoded = -8 | bytes[offset];
} else {
decoded = bytes[offset] & 7;
}
for (int i = 1; i < length; i++) {
decoded <<= 8;
decoded += Byte.toUnsignedInt(bytes[offset + i]);
}

return decoded;
}

private static byte[] encode(long l, int sign) {
assert l >= 0;

Expand Down Expand Up @@ -158,4 +256,43 @@ private static byte[] encode(long l, int sign) {
}
return encoded;
}

public enum LengthType {
FIXED_4 {
@Override
public int readLength(byte[] bytes, int offset) {
return 4;
}
},
FIXED_8 {
@Override
public int readLength(byte[] bytes, int offset) {
return 8;
}
},
FIXED_16 {
@Override
public int readLength(byte[] bytes, int offset) {
return 16;
}
},
VARIABLE {
@Override
public int readLength(byte[] bytes, int offset) {
// the first bit encodes the sign and the next 4 bits encode the number
// of additional bytes
int token = Byte.toUnsignedInt(bytes[offset]);
int length = (token >>> 3) & 0x0f;
if ((token & 0x80) == 0) {
length = 0x0f - length;
}
return 1 + length;
}
};

/**
* Return the length of the value that starts at {@code offset} in {@code bytes}.
*/
public abstract int readLength(byte[] bytes, int offset);
}
}
Loading