Query range fields by doc values when they are expected to be more ef…

…ficient than points. * Enable doc values for range fields by default. * Store ranges in a binary format that support multi field fields. * Added BinaryDocValuesRangeQuery that can query ranges that have been encoded into a binary doc values field. * Wrap range queries on a range field in IndexOrDocValuesQuery query. Closes #24314
elastic · Jul 12, 2017 · 0a25558 · 0a25558
1 parent ad01a67
commit 0a25558
Show file tree

Hide file tree

Showing 13 changed files with 1,679 additions and 62 deletions.
diff --git a/core/src/main/java/org/apache/lucene/queries/BinaryDocValuesRangeQuery.java b/core/src/main/java/org/apache/lucene/queries/BinaryDocValuesRangeQuery.java
@@ -0,0 +1,164 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.lucene.queries;
+
+import org.apache.lucene.index.BinaryDocValues;
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.search.ConstantScoreScorer;
+import org.apache.lucene.search.ConstantScoreWeight;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.Scorer;
+import org.apache.lucene.search.TwoPhaseIterator;
+import org.apache.lucene.search.Weight;
+import org.apache.lucene.store.ByteArrayDataInput;
+import org.apache.lucene.util.BytesRef;
+
+import java.io.IOException;
+import java.util.Objects;
+
+public final class BinaryDocValuesRangeQuery extends Query {
+
+    private final String fieldName;
+    private final QueryType queryType;
+    private final BytesRef from;
+    private final BytesRef to;
+    private final Object originalFrom;
+    private final Object originalTo;
+
+    public BinaryDocValuesRangeQuery(String fieldName, QueryType queryType, BytesRef from, BytesRef to,
+                                     Object originalFrom, Object originalTo) {
+        this.fieldName = fieldName;
+        this.queryType = queryType;
+        this.from = from;
+        this.to = to;
+        this.originalFrom = originalFrom;
+        this.originalTo = originalTo;
+    }
+
+    @Override
+    public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
+        return new ConstantScoreWeight(this, boost) {
+
+            @Override
+            public Scorer scorer(LeafReaderContext context) throws IOException {
+                final BinaryDocValues values = context.reader().getBinaryDocValues(fieldName);
+                if (values == null) {
+                    return null;
+                }
+
+                final TwoPhaseIterator iterator = new TwoPhaseIterator(values) {
+
+                    ByteArrayDataInput in = new ByteArrayDataInput();
+                    BytesRef otherFrom = new BytesRef(16);
+                    BytesRef otherTo = new BytesRef(16);
+
+                    @Override
+                    public boolean matches() throws IOException {
+                        BytesRef encodedRanges = values.binaryValue();
+                        in.reset(encodedRanges.bytes, encodedRanges.offset, encodedRanges.length);
+                        int numRanges = in.readVInt();
+                        for (int i = 0; i < numRanges; i++) {
+                            otherFrom.length = in.readVInt();
+                            otherFrom.bytes = encodedRanges.bytes;
+                            otherFrom.offset = in.getPosition();
+                            in.skipBytes(otherFrom.length);
+
+                            otherTo.length = in.readVInt();
+                            otherTo.bytes = encodedRanges.bytes;
+                            otherTo.offset = in.getPosition();
+                            in.skipBytes(otherTo.length);
+
+                            if (queryType.matches(from, to, otherFrom, otherTo)) {
+                                return true;
+                            }
+                        }
+                        return false;
+                    }
+
+                    @Override
+                    public float matchCost() {
+                        return 4; // at most 4 comparisons
+                    }
+                };
+                return new ConstantScoreScorer(this, score(), iterator);
+            }
+        };
+    }
+
+    @Override
+    public String toString(String field) {
+        return "BinaryDocValuesRangeQuery(fieldName=" + field + ",from=" + originalFrom + ",to=" + originalTo + ")";
+    }
+
+    @Override
+    public boolean equals(Object o) {
+        if (this == o) return true;
+        if (o == null || getClass() != o.getClass()) return false;
+        BinaryDocValuesRangeQuery that = (BinaryDocValuesRangeQuery) o;
+        return Objects.equals(fieldName, that.fieldName) &&
+                queryType == that.queryType &&
+                Objects.equals(from, that.from) &&
+                Objects.equals(to, that.to);
+    }
+
+    @Override
+    public int hashCode() {
+        return Objects.hash(getClass(), fieldName, queryType, from, to);
+    }
+
+    public enum QueryType {
+        INTERSECTS {
+            @Override
+            boolean matches(BytesRef from, BytesRef to, BytesRef otherFrom, BytesRef otherTo) {
+                // part of the other range must touch this range
+                // this:    |---------------|
+                // other:               |------|
+                return from.compareTo(otherTo) <= 0 && to.compareTo(otherFrom) >= 0;
+            }
+        }, WITHIN {
+            @Override
+            boolean matches(BytesRef from, BytesRef to, BytesRef otherFrom, BytesRef otherTo) {
+                // other range must entirely lie within this range
+                // this:    |---------------|
+                // other:       |------|
+                return from.compareTo(otherFrom) <= 0 && to.compareTo(otherTo) >= 0;
+            }
+        }, CONTAINS {
+            @Override
+            boolean matches(BytesRef from, BytesRef to, BytesRef otherFrom, BytesRef otherTo) {
+                // this and other range must overlap
+                // this:       |------|
+                // other:    |---------------|
+                return from.compareTo(otherFrom) >= 0 && to.compareTo(otherTo) <= 0;
+            }
+        }, CROSSES {
+            @Override
+            boolean matches(BytesRef from, BytesRef to, BytesRef otherFrom, BytesRef otherTo) {
+                // does not disjoint AND not within:
+                return  (from.compareTo(otherTo) > 0 || to.compareTo(otherFrom) < 0) == false &&
+                    (from.compareTo(otherFrom) <= 0 && to.compareTo(otherTo) >= 0) == false;
+            }
+        };
+
+        abstract boolean matches(BytesRef from, BytesRef to, BytesRef otherFrom, BytesRef otherTo);
+
+    }
+
+}
diff --git a/core/src/main/java/org/elasticsearch/index/mapper/BinaryRangeUtil.java b/core/src/main/java/org/elasticsearch/index/mapper/BinaryRangeUtil.java
@@ -0,0 +1,146 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.elasticsearch.index.mapper;
+
+import org.apache.lucene.store.ByteArrayDataOutput;
+import org.apache.lucene.util.BytesRef;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Set;
+
+enum BinaryRangeUtil {
+
+    ;
+
+    static BytesRef encodeLongRanges(Set<RangeFieldMapper.Range> ranges) throws IOException {
+        List<RangeFieldMapper.Range> sortedRanges = new ArrayList<>(ranges);
+        sortedRanges.sort((r1, r2) -> {
+            long r1From = ((Number) r1.from).longValue();
+            long r2From = ((Number) r2.from).longValue();
+            int cmp = Long.compare(r1From, r2From);
+            if (cmp != 0) {
+                return cmp;
+            } else {
+                long r1To = ((Number) r1.from).longValue();
+                long r2To = ((Number) r2.from).longValue();
+                return Long.compare(r1To, r2To);
+            }
+        });
+
+        final byte[] encoded = new byte[5 + ((5 + 9) * 2) * sortedRanges.size()];
+        ByteArrayDataOutput out = new ByteArrayDataOutput(encoded);
+        out.writeVInt(sortedRanges.size());
+        for (RangeFieldMapper.Range range : sortedRanges) {
+            byte[] encodedFrom = encode(((Number) range.from).longValue());
+            out.writeVInt(encodedFrom.length);
+            out.writeBytes(encodedFrom, encodedFrom.length);
+            byte[] encodedTo = encode(((Number) range.to).longValue());
+            out.writeVInt(encodedTo.length);
+            out.writeBytes(encodedTo, encodedTo.length);
+        }
+        return new BytesRef(encoded, 0, out.getPosition());
+    }
+
+    static BytesRef encodeDoubleRanges(Set<RangeFieldMapper.Range> ranges) throws IOException {
+        List<RangeFieldMapper.Range> sortedRanges = new ArrayList<>(ranges);
+        sortedRanges.sort((r1, r2) -> {
+            double r1From = ((Number) r1.from).doubleValue();
+            double r2From = ((Number) r2.from).doubleValue();
+            int cmp = Double.compare(r1From, r2From);
+            if (cmp != 0) {
+                return cmp;
+            } else {
+                double r1To = ((Number) r1.from).doubleValue();
+                double r2To = ((Number) r2.from).doubleValue();
+                return Double.compare(r1To, r2To);
+            }
+        });
+
+        final byte[] encoded = new byte[5 + ((5 + 9) * 2) * sortedRanges.size()];
+        ByteArrayDataOutput out = new ByteArrayDataOutput(encoded);
+        out.writeVInt(sortedRanges.size());
+        for (RangeFieldMapper.Range range : sortedRanges) {
+            byte[] encodedFrom = BinaryRangeUtil.encode(((Number) range.from).doubleValue());
+            out.writeVInt(encodedFrom.length);
+            out.writeBytes(encodedFrom, encodedFrom.length);
+            byte[] encodedTo = BinaryRangeUtil.encode(((Number) range.to).doubleValue());
+            out.writeVInt(encodedTo.length);
+            out.writeBytes(encodedTo, encodedTo.length);
+        }
+        return new BytesRef(encoded, 0, out.getPosition());
+    }
+
+    /**
+     * Encodes the specified number of type long in a variable-length byte format.
+     * The byte format preserves ordering, which means the returned byte array can be used for comparing as is.
+     */
+    static byte[] encode(long number) {
+        int sign = 1; // means positive
+        if (number < 0) {
+            number = -1 - number;
+            sign = 0;
+        }
+        return encode(number, sign);
+    }
+
+    /**
+     * Encodes the specified number of type double in a variable-length byte format.
+     * The byte format preserves ordering, which means the returned byte array can be used for comparing as is.
+     */
+    static byte[] encode(double number) {
+        long l;
+        int sign;
+        if (number < 0.0) {
+            l = Double.doubleToRawLongBits(-0d - number);
+            sign = 0;
+        } else {
+            l = Double.doubleToRawLongBits(number);
+            sign = 1; // means positive
+        }
+        return encode(l, sign);
+    }
+
+    private static byte[] encode(long l, int sign) {
+        assert l >= 0;
+        int bits = 64 - Long.numberOfLeadingZeros(l);
+
+        int numBytes = (bits + 7) / 8; // between 0 and 8
+        byte[] encoded = new byte[1 + numBytes];
+        // encode the sign first to make sure positive values compare greater than negative values
+        // and then the number of bytes, to make sure that large values compare greater than low values
+        if (sign > 0) {
+            encoded[0] = (byte) ((sign << 4) | numBytes);
+        } else {
+            encoded[0] = (byte) ((sign << 4) | (8 - numBytes));
+        }
+        for (int b = 0; b < numBytes; ++b) {
+            if (sign == 1) {
+                encoded[encoded.length - 1 - b] = (byte) (l >>> (8 * b));
+            } else if (sign == 0) {
+                encoded[encoded.length - 1 - b] = (byte) (0xFF - ((l >>> (8 * b)) & 0xFF));
+            } else {
+                throw new AssertionError();
+            }
+        }
+        return encoded;
+    }
+
+}