Implement runtime script ips (#60533)

This implements the `ip` typed runtime fields. They share a fair bit with `string` runtime fields but we represent them as a `BytesRef` containing 128 bits so that the comparisons all happen in the same way as Lucene's `InetAddressPoint`.
elastic · Aug 5, 2020 · e98dfcc · e98dfcc
1 parent 4540211
commit e98dfcc
Show file tree

Hide file tree

Showing 28 changed files with 1,909 additions and 65 deletions.
diff --git a/server/src/main/java/org/elasticsearch/index/mapper/IpFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/IpFieldMapper.java
@@ -54,6 +54,7 @@
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
+import java.util.function.BiFunction;
 import java.util.function.Supplier;
 
 /** A {@link FieldMapper} for ip addresses. */
@@ -155,7 +156,7 @@ public String typeName() {
             return CONTENT_TYPE;
         }
 
-        private InetAddress parse(Object value) {
+        private static InetAddress parse(Object value) {
             if (value instanceof InetAddress) {
                 return (InetAddress) value;
             } else {
@@ -221,6 +222,26 @@ public Query termsQuery(List<?> values, QueryShardContext context) {
         @Override
         public Query rangeQuery(Object lowerTerm, Object upperTerm, boolean includeLower, boolean includeUpper, QueryShardContext context) {
             failIfNotIndexed();
+            return rangeQuery(
+                lowerTerm,
+                upperTerm,
+                includeLower,
+                includeUpper,
+                (lower, upper) -> InetAddressPoint.newRangeQuery(name(), lower, upper)
+            );
+        }
+
+        /**
+         * Processes query bounds into {@code long}s and delegates the
+         * provided {@code builder} to build a range query.
+         */
+        public static Query rangeQuery(
+            Object lowerTerm,
+            Object upperTerm,
+            boolean includeLower,
+            boolean includeUpper,
+            BiFunction<InetAddress, InetAddress, Query> builder
+        ) {
             InetAddress lower;
             if (lowerTerm == null) {
                 lower = InetAddressPoint.MIN_VALUE;
@@ -247,7 +268,7 @@ public Query rangeQuery(Object lowerTerm, Object upperTerm, boolean includeLower
                 }
             }
 
-            return InetAddressPoint.newRangeQuery(name(), lower, upper);
+            return builder.apply(lower, upper);
         }
 
         public static final class IpScriptDocValues extends ScriptDocValues<String> {

diff --git a/...ntime-fields/src/main/java/org/elasticsearch/xpack/runtimefields/IpScriptFieldScript.java b/...ntime-fields/src/main/java/org/elasticsearch/xpack/runtimefields/IpScriptFieldScript.java
@@ -0,0 +1,113 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License;
+ * you may not use this file except in compliance with the Elastic License.
+ */
+
+package org.elasticsearch.xpack.runtimefields;
+
+import org.apache.lucene.document.InetAddressPoint;
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.BytesRef;
+import org.elasticsearch.common.network.InetAddresses;
+import org.elasticsearch.index.mapper.IpFieldMapper;
+import org.elasticsearch.painless.spi.Whitelist;
+import org.elasticsearch.painless.spi.WhitelistLoader;
+import org.elasticsearch.script.ScriptContext;
+import org.elasticsearch.script.ScriptFactory;
+import org.elasticsearch.search.lookup.SearchLookup;
+
+import java.io.IOException;
+import java.net.Inet4Address;
+import java.net.Inet6Address;
+import java.net.InetAddress;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * Script producing IP addresses. Unlike the other {@linkplain AbstractScriptFieldScript}s
+ * which deal with their native java objects this converts its values to the same format
+ * that Lucene uses to store its fields, {@link InetAddressPoint}. There are a few compelling
+ * reasons to do this:
+ * <ul>
+ * <li>{@link Inet4Address}es and {@link Inet6Address} are not comparable with one another.
+ * That is correct in some contexts, but not for our queries. Our queries must consider the
+ * IPv4 address equal to the address that it maps to in IPv6 <a href="https://tools.ietf.org/html/rfc4291">rfc4291</a>).
+ * <li>{@link InetAddress}es are not ordered, but we need to implement range queries with
+ * same same ordering as {@link IpFieldMapper}. That also uses {@link InetAddressPoint}
+ * so it saves us a lot of trouble to use the same representation.
+ * </ul>
+ */
+public abstract class IpScriptFieldScript extends AbstractScriptFieldScript {
+    public static final ScriptContext<Factory> CONTEXT = new ScriptContext<>("ip_script_field", Factory.class);
+
+    static List<Whitelist> whitelist() {
+        return List.of(WhitelistLoader.loadFromResourceFiles(RuntimeFieldsPainlessExtension.class, "ip_whitelist.txt"));
+    }
+
+    public static final String[] PARAMETERS = {};
+
+    public interface Factory extends ScriptFactory {
+        LeafFactory newFactory(Map<String, Object> params, SearchLookup searchLookup);
+    }
+
+    public interface LeafFactory {
+        IpScriptFieldScript newInstance(LeafReaderContext ctx) throws IOException;
+    }
+
+    private BytesRef[] values = new BytesRef[1];
+    private int count;
+
+    public IpScriptFieldScript(Map<String, Object> params, SearchLookup searchLookup, LeafReaderContext ctx) {
+        super(params, searchLookup, ctx);
+    }
+
+    /**
+     * Execute the script for the provided {@code docId}.
+     */
+    public final void runForDoc(int docId) {
+        count = 0;
+        setDocument(docId);
+        execute();
+    }
+
+    /**
+     * Values from the last time {@link #runForDoc(int)} was called. This array
+     * is mutable and will change with the next call of {@link #runForDoc(int)}.
+     * It is also oversized and will contain garbage at all indices at and
+     * above {@link #count()}.
+     * <p>
+     * All values are IPv6 addresses so they are 16 bytes. IPv4 addresses are
+     * encoded by <a href="https://tools.ietf.org/html/rfc4291">rfc4291</a>.
+     */
+    public final BytesRef[] values() {
+        return values;
+    }
+
+    /**
+     * The number of results produced the last time {@link #runForDoc(int)} was called.
+     */
+    public final int count() {
+        return count;
+    }
+
+    private void collectValue(String v) {
+        if (values.length < count + 1) {
+            values = ArrayUtil.grow(values, count + 1);
+        }
+        values[count++] = new BytesRef(InetAddressPoint.encode(InetAddresses.forString(v)));
+    }
+
+    public static class StringValue {
+        private final IpScriptFieldScript script;
+
+        public StringValue(IpScriptFieldScript script) {
+            this.script = script;
+        }
+
+        public void stringValue(String v) {
+            script.collectValue(v);
+        }
+    }
+}
diff --git a/...gin/runtime-fields/src/main/java/org/elasticsearch/xpack/runtimefields/RuntimeFields.java b/...gin/runtime-fields/src/main/java/org/elasticsearch/xpack/runtimefields/RuntimeFields.java
@@ -29,6 +29,7 @@ public List<ScriptContext<?>> getContexts() {
         return List.of(
             DateScriptFieldScript.CONTEXT,
             DoubleScriptFieldScript.CONTEXT,
+            IpScriptFieldScript.CONTEXT,
             LongScriptFieldScript.CONTEXT,
             StringScriptFieldScript.CONTEXT
         );

diff --git a/...s/src/main/java/org/elasticsearch/xpack/runtimefields/RuntimeFieldsPainlessExtension.java b/...s/src/main/java/org/elasticsearch/xpack/runtimefields/RuntimeFieldsPainlessExtension.java
@@ -19,6 +19,7 @@ public Map<ScriptContext<?>, List<Whitelist>> getContextWhitelists() {
         return Map.ofEntries(
             Map.entry(DateScriptFieldScript.CONTEXT, DateScriptFieldScript.whitelist()),
             Map.entry(DoubleScriptFieldScript.CONTEXT, DoubleScriptFieldScript.whitelist()),
+            Map.entry(IpScriptFieldScript.CONTEXT, IpScriptFieldScript.whitelist()),
             Map.entry(LongScriptFieldScript.CONTEXT, LongScriptFieldScript.whitelist()),
             Map.entry(StringScriptFieldScript.CONTEXT, StringScriptFieldScript.whitelist())
         );

diff --git a/.../src/main/java/org/elasticsearch/xpack/runtimefields/fielddata/ScriptBinaryFieldData.java b/.../src/main/java/org/elasticsearch/xpack/runtimefields/fielddata/ScriptBinaryFieldData.java
@@ -11,58 +11,25 @@
 import org.elasticsearch.ExceptionsHelper;
 import org.elasticsearch.common.util.BigArrays;
 import org.elasticsearch.index.fielddata.IndexFieldData;
-import org.elasticsearch.index.fielddata.IndexFieldDataCache;
 import org.elasticsearch.index.fielddata.LeafFieldData;
-import org.elasticsearch.index.fielddata.ScriptDocValues;
-import org.elasticsearch.index.fielddata.SortedBinaryDocValues;
 import org.elasticsearch.index.fielddata.fieldcomparator.BytesRefFieldComparatorSource;
-import org.elasticsearch.index.mapper.MapperService;
-import org.elasticsearch.indices.breaker.CircuitBreakerService;
 import org.elasticsearch.search.DocValueFormat;
 import org.elasticsearch.search.MultiValueMode;
-import org.elasticsearch.search.aggregations.support.CoreValuesSourceType;
-import org.elasticsearch.search.aggregations.support.ValuesSourceType;
 import org.elasticsearch.search.sort.BucketedSort;
 import org.elasticsearch.search.sort.SortOrder;
-import org.elasticsearch.xpack.runtimefields.StringScriptFieldScript;
-
-import java.io.IOException;
-
-public final class ScriptBinaryFieldData implements IndexFieldData<ScriptBinaryFieldData.ScriptBinaryLeafFieldData> {
-
-    public static class Builder implements IndexFieldData.Builder {
-        private final String name;
-        private final StringScriptFieldScript.LeafFactory leafFactory;
-
-        public Builder(String name, StringScriptFieldScript.LeafFactory leafFactory) {
-            this.name = name;
-            this.leafFactory = leafFactory;
-        }
-
-        @Override
-        public ScriptBinaryFieldData build(IndexFieldDataCache cache, CircuitBreakerService breakerService, MapperService mapperService) {
-            return new ScriptBinaryFieldData(name, leafFactory);
-        }
-    }
 
+public abstract class ScriptBinaryFieldData implements IndexFieldData<ScriptBinaryFieldData.ScriptBinaryLeafFieldData> {
     private final String fieldName;
-    private final StringScriptFieldScript.LeafFactory leafFactory;
 
-    private ScriptBinaryFieldData(String fieldName, StringScriptFieldScript.LeafFactory leafFactory) {
+    protected ScriptBinaryFieldData(String fieldName) {
         this.fieldName = fieldName;
-        this.leafFactory = leafFactory;
     }
 
     @Override
     public String getFieldName() {
         return fieldName;
     }
 
-    @Override
-    public ValuesSourceType getValuesSourceType() {
-        return CoreValuesSourceType.BYTES;
-    }
-
     @Override
     public ScriptBinaryLeafFieldData load(LeafReaderContext context) {
         try {
@@ -72,11 +39,6 @@ public ScriptBinaryLeafFieldData load(LeafReaderContext context) {
         }
     }
 
-    @Override
-    public ScriptBinaryLeafFieldData loadDirect(LeafReaderContext context) throws IOException {
-        return new ScriptBinaryLeafFieldData(new ScriptBinaryDocValues(leafFactory.newInstance(context)));
-    }
-
     @Override
     public SortField sortField(Object missingValue, MultiValueMode sortMode, XFieldComparatorSource.Nested nested, boolean reverse) {
         final XFieldComparatorSource source = new BytesRefFieldComparatorSource(this, missingValue, sortMode, nested);
@@ -97,23 +59,7 @@ public BucketedSort newBucketedSort(
         throw new IllegalArgumentException("only supported on numeric fields");
     }
 
-    public static class ScriptBinaryLeafFieldData implements LeafFieldData {
-        private final ScriptBinaryDocValues scriptBinaryDocValues;
-
-        ScriptBinaryLeafFieldData(ScriptBinaryDocValues scriptBinaryDocValues) {
-            this.scriptBinaryDocValues = scriptBinaryDocValues;
-        }
-
-        @Override
-        public ScriptDocValues<?> getScriptValues() {
-            return new ScriptDocValues.Strings(getBytesValues());
-        }
-
-        @Override
-        public SortedBinaryDocValues getBytesValues() {
-            return scriptBinaryDocValues;
-        }
-
+    public abstract class ScriptBinaryLeafFieldData implements LeafFieldData {
         @Override
         public long ramBytesUsed() {
             return 0;

diff --git a/...elds/src/main/java/org/elasticsearch/xpack/runtimefields/fielddata/ScriptIpDocValues.java b/...elds/src/main/java/org/elasticsearch/xpack/runtimefields/fielddata/ScriptIpDocValues.java
@@ -0,0 +1,44 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License;
+ * you may not use this file except in compliance with the Elastic License.
+ */
+
+package org.elasticsearch.xpack.runtimefields.fielddata;
+
+import org.apache.lucene.util.BytesRef;
+import org.elasticsearch.index.fielddata.SortedBinaryDocValues;
+import org.elasticsearch.xpack.runtimefields.IpScriptFieldScript;
+
+import java.io.IOException;
+import java.util.Arrays;
+
+public final class ScriptIpDocValues extends SortedBinaryDocValues {
+    private final IpScriptFieldScript script;
+    private int cursor;
+
+    ScriptIpDocValues(IpScriptFieldScript script) {
+        this.script = script;
+    }
+
+    @Override
+    public boolean advanceExact(int docId) {
+        script.runForDoc(docId);
+        if (script.count() == 0) {
+            return false;
+        }
+        Arrays.sort(script.values(), 0, script.count());
+        cursor = 0;
+        return true;
+    }
+
+    @Override
+    public BytesRef nextValue() throws IOException {
+        return script.values()[cursor++];
+    }
+
+    @Override
+    public int docValueCount() {
+        return script.count();
+    }
+}