Don't expose TextFieldMapper subfields (#64597)

TextFieldMapper can optionally index data into subfields for accelerated prefix and phrase queries. Currently, these subfields are implemented as FieldMappers in their own right, made available via TextFieldMapper's iterator() method and with their own standalone MappedFieldType objects. This has the disadvantage that these subfields are directly available for searching, and appear in APIs such as field caps. In addition, because exists queries are not implemented on them, an exists query against an object which contains a text field with one of the subfields enabled can throw an error (see #63585). This commit reworks the subfields so that they are no longer implemented as FieldMappers, and are no longer exposed to classes outside TextFieldMapper either as MappedFieldTypes or as FieldMappers. The parent TextFieldMapper handles indexing and analyzer registration, PhraseFieldType is removed entirely, and PrefixFieldType is retained as a private implementation for fast prefix queries but is unavailable for querying directly. Fixes #63585 Closes #63446
elastic · Nov 5, 2020 · cea93d1 · cea93d1
1 parent 33a38d4
commit cea93d1
Showing 4 changed files with 107 additions and 175 deletions.
diff --git a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java
@@ -57,7 +57,6 @@
 import org.apache.lucene.util.automaton.Automaton;
 import org.apache.lucene.util.automaton.Operations;
 import org.elasticsearch.Version;
-import org.elasticsearch.common.collect.Iterators;
 import org.elasticsearch.common.lucene.Lucene;
 import org.elasticsearch.common.lucene.search.AutomatonQueries;
 import org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery;
@@ -80,7 +79,6 @@
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.HashMap;
-import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
 import java.util.Objects;
@@ -92,6 +90,7 @@ public class TextFieldMapper extends FieldMapper {
 
     public static final String CONTENT_TYPE = "text";
     private static final String FAST_PHRASE_SUFFIX = "._index_phrase";
+    private static final String FAST_PREFIX_SUFFIX = "._index_prefix";
 
     public static class Defaults {
         public static final double FIELDDATA_MIN_FREQUENCY = 0;
@@ -330,7 +329,7 @@ private TextFieldType buildFieldType(FieldType fieldType, ContentPath contentPat
             return ft;
         }
 
-        private PrefixFieldMapper buildPrefixMapper(ContentPath contentPath, FieldType fieldType, TextFieldType tft) {
+        private SubFieldInfo buildPrefixInfo(ContentPath contentPath, FieldType fieldType, TextFieldType tft) {
             if (indexPrefixes.get() == null) {
                 return null;
             }
@@ -358,16 +357,15 @@ private PrefixFieldMapper buildPrefixMapper(ContentPath contentPath, FieldType f
             if (fieldType.storeTermVectorOffsets()) {
                 pft.setStoreTermVectorOffsets(true);
             }
-            PrefixFieldType prefixFieldType = new PrefixFieldType(tft, fullName + "._index_prefix", indexPrefixes.get());
-            tft.setPrefixFieldType(prefixFieldType);
-            return new PrefixFieldMapper(pft, prefixFieldType, new PrefixWrappedAnalyzer(
+            tft.setIndexPrefixes(indexPrefixes.get().minChars, indexPrefixes.get().maxChars);
+            return new SubFieldInfo(fullName + "._index_prefix", pft, new PrefixWrappedAnalyzer(
                 analyzers.getIndexAnalyzer().analyzer(),
                 analyzers.positionIncrementGap.get(),
-                prefixFieldType.minChars,
-                prefixFieldType.maxChars));
+                indexPrefixes.get().minChars,
+                indexPrefixes.get().maxChars));
         }
 
-        private PhraseFieldMapper buildPhraseMapper(FieldType fieldType, TextFieldType parent) {
+        private SubFieldInfo buildPhraseInfo(FieldType fieldType, TextFieldType parent) {
             if (indexPhrases.get() == false) {
                 return null;
             }
@@ -381,24 +379,24 @@ private PhraseFieldMapper buildPhraseMapper(FieldType fieldType, TextFieldType p
             parent.setIndexPhrases();
             PhraseWrappedAnalyzer a
                 = new PhraseWrappedAnalyzer(analyzers.getIndexAnalyzer().analyzer(), analyzers.positionIncrementGap.get());
-            return new PhraseFieldMapper(phraseFieldType, new PhraseFieldType(parent), a);
+            return new SubFieldInfo(parent.name() + FAST_PHRASE_SUFFIX, phraseFieldType, a);
         }
 
         public Map<String, NamedAnalyzer> indexAnalyzers(String name,
-                                                         PhraseFieldMapper phraseFieldMapper,
-                                                         PrefixFieldMapper prefixFieldMapper) {
+                                                         SubFieldInfo phraseFieldInfo,
+                                                         SubFieldInfo prefixFieldInfo) {
             Map<String, NamedAnalyzer> analyzers = new HashMap<>();
             NamedAnalyzer main = this.analyzers.getIndexAnalyzer();
             analyzers.put(name, main);
-            if (phraseFieldMapper != null) {
+            if (phraseFieldInfo != null) {
                 analyzers.put(
-                    phraseFieldMapper.name(),
-                    new NamedAnalyzer(main.name() + "_phrase", AnalyzerScope.INDEX, phraseFieldMapper.analyzer));
+                    phraseFieldInfo.field,
+                    new NamedAnalyzer(main.name() + "_phrase", AnalyzerScope.INDEX, phraseFieldInfo.analyzer));
             }
-            if (prefixFieldMapper != null) {
+            if (prefixFieldInfo != null) {
                 analyzers.put(
-                    prefixFieldMapper.name(),
-                    new NamedAnalyzer(main.name() + "_prefix", AnalyzerScope.INDEX, prefixFieldMapper.analyzer));
+                    prefixFieldInfo.field,
+                    new NamedAnalyzer(main.name() + "_prefix", AnalyzerScope.INDEX, prefixFieldInfo.analyzer));
             }
             return analyzers;
         }
@@ -407,12 +405,18 @@ public Map<String, NamedAnalyzer> indexAnalyzers(String name,
         public TextFieldMapper build(ContentPath contentPath) {
             FieldType fieldType = TextParams.buildFieldType(index, store, indexOptions, norms, termVectors);
             TextFieldType tft = buildFieldType(fieldType, contentPath);
-            PhraseFieldMapper phraseFieldMapper = buildPhraseMapper(fieldType, tft);
-            PrefixFieldMapper prefixFieldMapper = buildPrefixMapper(contentPath, fieldType, tft);
+            SubFieldInfo phraseFieldInfo = buildPhraseInfo(fieldType, tft);
+            SubFieldInfo prefixFieldInfo = buildPrefixInfo(contentPath, fieldType, tft);
+            MultiFields multiFields = multiFieldsBuilder.build(this, contentPath);
+            for (Mapper mapper : multiFields) {
+                if (mapper.name().endsWith(FAST_PHRASE_SUFFIX) || mapper.name().endsWith(FAST_PREFIX_SUFFIX)) {
+                    throw new MapperParsingException("Cannot use reserved field name [" + mapper.name() + "]");
+                }
+            }
             return new TextFieldMapper(name, fieldType, tft,
-                indexAnalyzers(tft.name(), phraseFieldMapper, prefixFieldMapper),
-                prefixFieldMapper, phraseFieldMapper,
-                multiFieldsBuilder.build(this, contentPath), copyTo.build(), this);
+                indexAnalyzers(tft.name(), phraseFieldInfo, prefixFieldInfo),
+                prefixFieldInfo, phraseFieldInfo,
+                multiFields, copyTo.build(), this);
         }
     }
 
@@ -478,55 +482,22 @@ protected TokenStreamComponents wrapComponents(String fieldName, TokenStreamComp
         }
     }
 
-    static final class PhraseFieldType extends StringFieldType {
-
-        final TextFieldType parent;
-
-        PhraseFieldType(TextFieldType parent) {
-            super(parent.name() + FAST_PHRASE_SUFFIX, true, false, false, parent.getTextSearchInfo(), Collections.emptyMap());
-            this.parent = parent;
-        }
-
-        @Override
-        public String typeName() {
-            return "phrase";
-        }
-
-        @Override
-        public ValueFetcher valueFetcher(QueryShardContext context, SearchLookup searchLookup, String format) {
-            // Because this internal field is modelled as a multi-field, SourceValueFetcher will look up its
-            // parent field in _source. So we don't need to use the parent field name here.
-            return SourceValueFetcher.toString(name(), context, format);
-        }
-
-        @Override
-        public Query existsQuery(QueryShardContext context) {
-            throw new UnsupportedOperationException();
-        }
-    }
-
-    static final class PrefixFieldType extends StringFieldType {
+    private static final class PrefixFieldType extends StringFieldType {
 
         final int minChars;
         final int maxChars;
         final TextFieldType parentField;
 
-        PrefixFieldType(TextFieldType parentField, String name, PrefixConfig config) {
-            this(parentField, name, config.minChars, config.maxChars);
-        }
-
-        PrefixFieldType(TextFieldType parentField, String name, int minChars, int maxChars) {
-            super(name, true, false, false, parentField.getTextSearchInfo(), Collections.emptyMap());
+        PrefixFieldType(TextFieldType parentField, int minChars, int maxChars) {
+            super(parentField.name() + FAST_PREFIX_SUFFIX, true, false, false, parentField.getTextSearchInfo(), Collections.emptyMap());
             this.minChars = minChars;
             this.maxChars = maxChars;
             this.parentField = parentField;
         }
 
         @Override
         public ValueFetcher valueFetcher(QueryShardContext context, SearchLookup searchLookup, String format) {
-            // Because this internal field is modelled as a multi-field, SourceValueFetcher will look up its
-            // parent field in _source. So we don't need to use the parent field name here.
-            return SourceValueFetcher.toString(name(), context, format);
+            throw new UnsupportedOperationException();
         }
 
         boolean accept(int length) {
@@ -590,67 +561,18 @@ public Query existsQuery(QueryShardContext context) {
         }
     }
 
-    private static final class PhraseFieldMapper extends FieldMapper {
+    private static final class SubFieldInfo {
 
         private final Analyzer analyzer;
         private final FieldType fieldType;
+        private final String field;
 
-        PhraseFieldMapper(FieldType fieldType, PhraseFieldType mappedFieldType, PhraseWrappedAnalyzer analyzer) {
-            super(mappedFieldType.name(), mappedFieldType, MultiFields.empty(), CopyTo.empty());
+        SubFieldInfo(String field, FieldType fieldType, Analyzer analyzer) {
             this.fieldType = fieldType;
             this.analyzer = analyzer;
+            this.field = field;
         }
 
-        @Override
-        protected void parseCreateField(ParseContext context) {
-            throw new UnsupportedOperationException();
-        }
-
-        @Override
-        public Builder getMergeBuilder() {
-            return null;
-        }
-
-        @Override
-        protected String contentType() {
-            return "phrase";
-        }
-    }
-
-    private static final class PrefixFieldMapper extends FieldMapper {
-
-        private final Analyzer analyzer;
-        private final FieldType fieldType;
-
-        protected PrefixFieldMapper(FieldType fieldType, PrefixFieldType mappedFieldType, Analyzer analyzer) {
-            super(mappedFieldType.name(), mappedFieldType, MultiFields.empty(), CopyTo.empty());
-            this.analyzer = analyzer;
-            this.fieldType = fieldType;
-        }
-
-        void addField(ParseContext context, String value) {
-            context.doc().add(new Field(fieldType().name(), value, fieldType));
-        }
-
-        @Override
-        protected void parseCreateField(ParseContext context) {
-            throw new UnsupportedOperationException();
-        }
-
-        @Override
-        public Builder getMergeBuilder() {
-            return null;
-        }
-
-        @Override
-        protected String contentType() {
-            return "prefix";
-        }
-
-        @Override
-        public String toString() {
-            return fieldType().toString();
-        }
     }
 
     public static class TextFieldType extends StringFieldType {
@@ -702,8 +624,8 @@ int fielddataMinSegmentSize() {
             return filter.minSegmentSize;
         }
 
-        void setPrefixFieldType(PrefixFieldType prefixFieldType) {
-            this.prefixFieldType = prefixFieldType;
+        void setIndexPrefixes(int minChars, int maxChars) {
+            this.prefixFieldType = new PrefixFieldType(this, minChars, maxChars);
         }
 
         void setIndexPhrases() {
@@ -862,14 +784,14 @@ public IndexFieldData.Builder fielddataBuilder(String fullyQualifiedIndexName, S
 
     private final Builder builder;
     private final FieldType fieldType;
-    private final PrefixFieldMapper prefixFieldMapper;
-    private final PhraseFieldMapper phraseFieldMapper;
+    private final SubFieldInfo prefixFieldInfo;
+    private final SubFieldInfo phraseFieldInfo;
 
     protected TextFieldMapper(String simpleName, FieldType fieldType,
                               TextFieldType mappedFieldType,
                               Map<String, NamedAnalyzer> indexAnalyzers,
-                              PrefixFieldMapper prefixFieldMapper,
-                              PhraseFieldMapper phraseFieldMapper,
+                              SubFieldInfo prefixFieldInfo,
+                              SubFieldInfo phraseFieldInfo,
                               MultiFields multiFields, CopyTo copyTo, Builder builder) {
         super(simpleName, mappedFieldType, indexAnalyzers, multiFields, copyTo);
         assert mappedFieldType.getTextSearchInfo().isTokenized();
@@ -878,8 +800,8 @@ protected TextFieldMapper(String simpleName, FieldType fieldType,
             throw new IllegalArgumentException("Cannot enable fielddata on a [text] field that is not indexed: [" + name() + "]");
         }
         this.fieldType = fieldType;
-        this.prefixFieldMapper = prefixFieldMapper;
-        this.phraseFieldMapper = phraseFieldMapper;
+        this.prefixFieldInfo = prefixFieldInfo;
+        this.phraseFieldInfo = phraseFieldInfo;
         this.builder = builder;
     }
 
@@ -907,30 +829,15 @@ protected void parseCreateField(ParseContext context) throws IOException {
             if (fieldType.omitNorms()) {
                 createFieldNamesField(context);
             }
-            if (prefixFieldMapper != null) {
-                prefixFieldMapper.addField(context, value);
+            if (prefixFieldInfo != null) {
+                context.doc().add(new Field(prefixFieldInfo.field, value, prefixFieldInfo.fieldType));
             }
-            if (phraseFieldMapper != null) {
-                context.doc().add(new Field(phraseFieldMapper.fieldType().name(), value, phraseFieldMapper.fieldType));
+            if (phraseFieldInfo != null) {
+                context.doc().add(new Field(phraseFieldInfo.field, value, phraseFieldInfo.fieldType));
             }
         }
     }
 
-    @Override
-    public Iterator<Mapper> iterator() {
-        List<Mapper> subIterators = new ArrayList<>();
-        if (prefixFieldMapper != null) {
-            subIterators.add(prefixFieldMapper);
-        }
-        if (phraseFieldMapper != null) {
-            subIterators.add(phraseFieldMapper);
-        }
-        if (subIterators.size() == 0) {
-            return super.iterator();
-        }
-        return Iterators.concat(super.iterator(), subIterators.iterator());
-    }
-
     @Override
     protected String contentType() {
         return CONTENT_TYPE;
@@ -1014,10 +921,11 @@ public static Query createPhrasePrefixQuery(TokenStream stream, String field, in
         }
 
         if (terms.length == 1) {
-            Term[] newTerms = Arrays.stream(terms[0])
+            SynonymQuery.Builder sb = new SynonymQuery.Builder(prefixField);
+            Arrays.stream(terms[0])
                 .map(term -> new Term(prefixField, term.bytes()))
-                .toArray(Term[]::new);
-            return new SynonymQuery(newTerms);
+                .forEach(sb::addTerm);
+            return sb.build();
         }
 
         SpanNearQuery.Builder spanQuery = new SpanNearQuery.Builder(field, true);