Enhances exists queries to reduce need for _field_names (elastic#26930

) * Enhances exists queries to reduce need for `_field_names` Before this change we wrote the name all the fields in a document to a `_field_names` field and then implemented exists queries as a term query on this field. The problem with this approach is that it bloats the index and also affects indexing performance. This change adds a new method `existsQuery()` to `MappedFieldType` which is implemented by each sub-class. For most field types if doc values are available a `DocValuesFieldExistsQuery` is used, falling back to using `_field_names` if doc values are disabled. Note that only fields where no doc values are available are written to `_field_names`. Closes elastic#26770 * Addresses review comments * Addresses more review comments * implements existsQuery explicitly on every mapper * Reinstates ability to perform term query on `_field_names` * Added bwc depending on index created version * Review Comments * Skips tests that are not supported in 6.1.0 These values will need to be changed after backporting this PR to 6.x
jasontedor · Nov 1, 2017 · 99aca9c · 99aca9c
1 parent d805c41
commit 99aca9c
Show file tree

Hide file tree

Showing 45 changed files with 1,880 additions and 190 deletions.
diff --git a/core/src/main/java/org/elasticsearch/index/mapper/BinaryFieldMapper.java b/core/src/main/java/org/elasticsearch/index/mapper/BinaryFieldMapper.java
@@ -20,10 +20,14 @@
 package org.elasticsearch.index.mapper;
 
 import com.carrotsearch.hppc.ObjectArrayList;
+
 import org.apache.lucene.document.Field;
 import org.apache.lucene.index.IndexOptions;
 import org.apache.lucene.index.IndexableField;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.DocValuesFieldExistsQuery;
 import org.apache.lucene.search.Query;
+import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.store.ByteArrayDataOutput;
 import org.apache.lucene.util.BytesRef;
 import org.elasticsearch.ElasticsearchException;
@@ -126,6 +130,15 @@ public IndexFieldData.Builder fielddataBuilder(String fullyQualifiedIndexName) {
             return new BytesBinaryDVIndexFieldData.Builder();
         }
 
+        @Override
+        public Query existsQuery(QueryShardContext context) {
+            if (hasDocValues()) {
+                return new DocValuesFieldExistsQuery(name());
+            } else {
+                return new TermQuery(new Term(FieldNamesFieldMapper.NAME, name()));
+            }
+        }
+
         @Override
         public Query termQuery(Object value, QueryShardContext context) {
             throw new QueryShardException(context, "Binary fields do not support searching");
@@ -165,6 +178,11 @@ protected void parseCreateField(ParseContext context, List<IndexableField> field
             } else {
                 field.add(value);
             }
+        } else {
+            // Only add an entry to the field names field if the field is stored
+            // but has no doc values so exists query will work on a field with
+            // no doc values
+            createFieldNamesField(context, fields);
         }
 
     }

diff --git a/core/src/main/java/org/elasticsearch/index/mapper/BooleanFieldMapper.java b/core/src/main/java/org/elasticsearch/index/mapper/BooleanFieldMapper.java
@@ -23,7 +23,10 @@
 import org.apache.lucene.document.SortedNumericDocValuesField;
 import org.apache.lucene.index.IndexOptions;
 import org.apache.lucene.index.IndexableField;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.DocValuesFieldExistsQuery;
 import org.apache.lucene.search.Query;
+import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.search.TermRangeQuery;
 import org.apache.lucene.util.BytesRef;
 import org.elasticsearch.Version;
@@ -136,6 +139,15 @@ public String typeName() {
             return CONTENT_TYPE;
         }
 
+        @Override
+        public Query existsQuery(QueryShardContext context) {
+            if (hasDocValues()) {
+                return new DocValuesFieldExistsQuery(name());
+            } else {
+                return new TermQuery(new Term(FieldNamesFieldMapper.NAME, name()));
+            }
+        }
+
         @Override
         public Boolean nullValue() {
             return (Boolean)super.nullValue();
@@ -253,6 +265,8 @@ protected void parseCreateField(ParseContext context, List<IndexableField> field
         }
         if (fieldType().hasDocValues()) {
             fields.add(new SortedNumericDocValuesField(fieldType().name(), value ? 1 : 0));
+        } else {
+            createFieldNamesField(context, fields);
         }
     }
 

diff --git a/core/src/main/java/org/elasticsearch/index/mapper/CompletionFieldMapper.java b/core/src/main/java/org/elasticsearch/index/mapper/CompletionFieldMapper.java
@@ -21,6 +21,8 @@
 import org.apache.lucene.codecs.PostingsFormat;
 import org.apache.lucene.index.IndexableField;
 import org.apache.lucene.index.Term;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.search.suggest.document.Completion50PostingsFormat;
 import org.apache.lucene.search.suggest.document.CompletionAnalyzer;
 import org.apache.lucene.search.suggest.document.CompletionQuery;
@@ -40,11 +42,13 @@
 import org.elasticsearch.common.xcontent.XContentParser.Token;
 import org.elasticsearch.index.analysis.AnalyzerScope;
 import org.elasticsearch.index.analysis.NamedAnalyzer;
+import org.elasticsearch.index.query.QueryShardContext;
 import org.elasticsearch.search.suggest.completion.CompletionSuggester;
 import org.elasticsearch.search.suggest.completion.context.ContextMapping;
 import org.elasticsearch.search.suggest.completion.context.ContextMappings;
 
 import java.io.IOException;
+import java.util.ArrayList;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.HashSet;
@@ -257,6 +261,11 @@ public static synchronized PostingsFormat postingsFormat() {
             return postingsFormat;
         }
 
+        @Override
+        public Query existsQuery(QueryShardContext context) {
+            return new TermQuery(new Term(FieldNamesFieldMapper.NAME, name()));
+        }
+
         /**
          * Completion prefix query
          */
@@ -456,6 +465,11 @@ public Mapper parse(ParseContext context) throws IOException {
                 context.doc().add(new SuggestField(fieldType().name(), input, metaData.weight));
             }
         }
+        List<IndexableField> fields = new ArrayList<>(1);
+        createFieldNamesField(context, fields);
+        for (IndexableField field : fields) {
+            context.doc().add(field);
+        }
         multiFields.parse(this, context);
         return null;
     }

diff --git a/core/src/main/java/org/elasticsearch/index/mapper/DateFieldMapper.java b/core/src/main/java/org/elasticsearch/index/mapper/DateFieldMapper.java
@@ -26,9 +26,12 @@
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexableField;
 import org.apache.lucene.index.PointValues;
+import org.apache.lucene.index.Term;
 import org.apache.lucene.search.BoostQuery;
+import org.apache.lucene.search.DocValuesFieldExistsQuery;
 import org.apache.lucene.search.IndexOrDocValuesQuery;
 import org.apache.lucene.search.Query;
+import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.util.BytesRef;
 import org.elasticsearch.Version;
 import org.elasticsearch.common.Explicit;
@@ -245,6 +248,15 @@ long parse(String value) {
             return dateTimeFormatter().parser().parseMillis(value);
         }
 
+        @Override
+        public Query existsQuery(QueryShardContext context) {
+            if (hasDocValues()) {
+                return new DocValuesFieldExistsQuery(name());
+            } else {
+                return new TermQuery(new Term(FieldNamesFieldMapper.NAME, name()));
+            }
+        }
+
         @Override
         public Query termQuery(Object value, @Nullable QueryShardContext context) {
             Query query = rangeQuery(value, value, true, true, ShapeRelation.INTERSECTS, null, null, context);
@@ -451,6 +463,8 @@ protected void parseCreateField(ParseContext context, List<IndexableField> field
         }
         if (fieldType().hasDocValues()) {
             fields.add(new SortedNumericDocValuesField(fieldType().name(), timestamp));
+        } else if (fieldType().stored() || fieldType().indexOptions() != IndexOptions.NONE) {
+            createFieldNamesField(context, fields);
         }
         if (fieldType().stored()) {
             fields.add(new StoredField(fieldType().name(), timestamp));

diff --git a/core/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java b/core/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java
@@ -22,6 +22,7 @@
 import com.carrotsearch.hppc.cursors.ObjectCursor;
 import com.carrotsearch.hppc.cursors.ObjectObjectCursor;
 
+import org.apache.lucene.document.Field;
 import org.apache.lucene.document.FieldType;
 import org.apache.lucene.index.IndexOptions;
 import org.apache.lucene.index.IndexableField;
@@ -33,6 +34,7 @@
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.common.xcontent.XContentBuilder;
 import org.elasticsearch.index.analysis.NamedAnalyzer;
+import org.elasticsearch.index.mapper.FieldNamesFieldMapper.FieldNamesFieldType;
 import org.elasticsearch.index.similarity.SimilarityProvider;
 import org.elasticsearch.index.similarity.SimilarityService;
 
@@ -285,6 +287,16 @@ public Mapper parse(ParseContext context) throws IOException {
      */
     protected abstract void parseCreateField(ParseContext context, List<IndexableField> fields) throws IOException;
 
+    protected void createFieldNamesField(ParseContext context, List<IndexableField> fields) {
+        FieldNamesFieldType fieldNamesFieldType = (FieldNamesFieldMapper.FieldNamesFieldType) context.docMapper()
+                .metadataMapper(FieldNamesFieldMapper.class).fieldType();
+        if (fieldNamesFieldType != null && fieldNamesFieldType.isEnabled()) {
+            for (String fieldName : FieldNamesFieldMapper.extractFieldNames(fieldType().name())) {
+                fields.add(new Field(FieldNamesFieldMapper.NAME, fieldName, fieldNamesFieldType));
+            }
+        }
+    }
+
     @Override
     public Iterator<Mapper> iterator() {
         return multiFields.iterator();

diff --git a/core/src/main/java/org/elasticsearch/index/mapper/FieldNamesFieldMapper.java b/core/src/main/java/org/elasticsearch/index/mapper/FieldNamesFieldMapper.java
@@ -23,6 +23,10 @@
 import org.apache.lucene.index.IndexOptions;
 import org.apache.lucene.index.IndexableField;
 import org.apache.lucene.search.Query;
+import org.elasticsearch.Version;
+import org.elasticsearch.cluster.metadata.IndexMetaData;
+import org.elasticsearch.common.logging.DeprecationLogger;
+import org.elasticsearch.common.logging.ESLoggerFactory;
 import org.elasticsearch.common.lucene.Lucene;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.common.xcontent.XContentBuilder;
@@ -44,6 +48,9 @@
  */
 public class FieldNamesFieldMapper extends MetadataFieldMapper {
 
+    private static final DeprecationLogger DEPRECATION_LOGGER = new DeprecationLogger(
+            ESLoggerFactory.getLogger(FieldNamesFieldMapper.class));
+
     public static final String NAME = "_field_names";
 
     public static final String CONTENT_TYPE = "_field_names";
@@ -178,11 +185,18 @@ public boolean isEnabled() {
             return enabled;
         }
 
+        @Override
+        public Query existsQuery(QueryShardContext context) {
+            throw new UnsupportedOperationException("Cannot run exists query on _field_names");
+        }
+
         @Override
         public Query termQuery(Object value, QueryShardContext context) {
             if (isEnabled() == false) {
                 throw new IllegalStateException("Cannot run [exists] queries if the [_field_names] field is disabled");
             }
+            DEPRECATION_LOGGER.deprecated(
+                    "terms query on the _field_names field is deprecated and will be removed, use exists query instead");
             return super.termQuery(value, context);
         }
     }
@@ -206,12 +220,14 @@ public void preParse(ParseContext context) throws IOException {
 
     @Override
     public void postParse(ParseContext context) throws IOException {
-        super.parse(context);
+        if (context.indexSettings().getAsVersion(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT).before(Version.V_6_1_0)) {
+            super.parse(context);
+        }
     }
 
     @Override
     public Mapper parse(ParseContext context) throws IOException {
-        // we parse in post parse
+        // Adding values to the _field_names field is handled by the mappers for each field type
         return null;
     }
 

diff --git a/core/src/main/java/org/elasticsearch/index/mapper/GeoPointFieldMapper.java b/core/src/main/java/org/elasticsearch/index/mapper/GeoPointFieldMapper.java
@@ -23,7 +23,10 @@
 import org.apache.lucene.document.StoredField;
 import org.apache.lucene.index.IndexOptions;
 import org.apache.lucene.index.IndexableField;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.DocValuesFieldExistsQuery;
 import org.apache.lucene.search.Query;
+import org.apache.lucene.search.TermQuery;
 import org.elasticsearch.ElasticsearchParseException;
 import org.elasticsearch.common.Explicit;
 import org.elasticsearch.common.geo.GeoPoint;
@@ -37,6 +40,7 @@
 import org.elasticsearch.index.query.QueryShardException;
 
 import java.io.IOException;
+import java.util.ArrayList;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
@@ -180,6 +184,15 @@ public IndexFieldData.Builder fielddataBuilder(String fullyQualifiedIndexName) {
             return new AbstractLatLonPointDVIndexFieldData.Builder();
         }
 
+        @Override
+        public Query existsQuery(QueryShardContext context) {
+            if (hasDocValues()) {
+                return new DocValuesFieldExistsQuery(name());
+            } else {
+                return new TermQuery(new Term(FieldNamesFieldMapper.NAME, name()));
+            }
+        }
+
         @Override
         public Query termQuery(Object value, QueryShardContext context) {
             throw new QueryShardException(context, "Geo fields do not support exact searching, use dedicated geo queries instead: ["
@@ -207,6 +220,12 @@ protected void parse(ParseContext context, GeoPoint point) throws IOException {
         }
         if (fieldType.hasDocValues()) {
             context.doc().add(new LatLonDocValuesField(fieldType().name(), point.lat(), point.lon()));
+        } else if (fieldType().stored() || fieldType().indexOptions() != IndexOptions.NONE) {
+            List<IndexableField> fields = new ArrayList<>(1);
+            createFieldNamesField(context, fields);
+            for (IndexableField field : fields) {
+                context.doc().add(field);
+            }
         }
         // if the mapping contains multifields then use the geohash string
         if (multiFields.iterator().hasNext()) {

diff --git a/core/src/main/java/org/elasticsearch/index/mapper/GeoShapeFieldMapper.java b/core/src/main/java/org/elasticsearch/index/mapper/GeoShapeFieldMapper.java
@@ -18,17 +18,20 @@
  */
 package org.elasticsearch.index.mapper;
 
-import org.apache.lucene.document.Field;
 import org.apache.lucene.index.IndexOptions;
 import org.apache.lucene.index.IndexableField;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.DocValuesFieldExistsQuery;
 import org.apache.lucene.search.Query;
+import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.spatial.prefix.PrefixTreeStrategy;
 import org.apache.lucene.spatial.prefix.RecursivePrefixTreeStrategy;
 import org.apache.lucene.spatial.prefix.TermQueryPrefixTreeStrategy;
 import org.apache.lucene.spatial.prefix.tree.GeohashPrefixTree;
 import org.apache.lucene.spatial.prefix.tree.PackedQuadPrefixTree;
 import org.apache.lucene.spatial.prefix.tree.QuadPrefixTree;
 import org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree;
+import org.elasticsearch.Version;
 import org.elasticsearch.common.Explicit;
 import org.elasticsearch.common.geo.GeoUtils;
 import org.elasticsearch.common.geo.SpatialStrategy;
@@ -44,6 +47,8 @@
 import org.locationtech.spatial4j.shape.jts.JtsGeometry;
 
 import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
@@ -125,6 +130,11 @@ public Builder coerce(boolean coerce) {
             return builder;
         }
 
+        @Override
+        protected boolean defaultDocValues(Version indexCreated) {
+            return false;
+        }
+
         protected Explicit<Boolean> coerce(BuilderContext context) {
             if (coerce != null) {
                 return new Explicit<>(coerce, true);
@@ -406,6 +416,11 @@ public PrefixTreeStrategy resolveStrategy(String strategyName) {
             throw new IllegalArgumentException("Unknown prefix tree strategy [" + strategyName + "]");
         }
 
+        @Override
+        public Query existsQuery(QueryShardContext context) {
+            return new TermQuery(new Term(FieldNamesFieldMapper.NAME, name()));
+        }
+
         @Override
         public Query termQuery(Object value, QueryShardContext context) {
             throw new QueryShardException(context, "Geo fields do not support exact searching, use dedicated geo queries instead");
@@ -440,11 +455,9 @@ public Mapper parse(ParseContext context) throws IOException {
                 throw new MapperParsingException("[{" + fieldType().name() + "}] is configured for points only but a " +
                         ((shape instanceof JtsGeometry) ? ((JtsGeometry)shape).getGeom().getGeometryType() : shape.getClass()) + " was found");
             }
-            Field[] fields = fieldType().defaultStrategy().createIndexableFields(shape);
-            if (fields == null || fields.length == 0) {
-                return null;
-            }
-            for (Field field : fields) {
+            List<IndexableField> fields = new ArrayList<>(Arrays.asList(fieldType().defaultStrategy().createIndexableFields(shape)));
+            createFieldNamesField(context, fields);
+            for (IndexableField field : fields) {
                 context.doc().add(field);
             }
         } catch (Exception e) {