Skip to content

Commit

Permalink
Don't expose TextFieldMapper subfields (#64597)
Browse files Browse the repository at this point in the history
TextFieldMapper can optionally index data into subfields for accelerated
prefix and phrase queries. Currently, these subfields are implemented
as FieldMappers in their own right, made available via TextFieldMapper's
iterator() method and with their own standalone MappedFieldType objects.

This has the disadvantage that these subfields are directly available for
searching, and appear in APIs such as field caps. In addition, because
exists queries are not implemented on them, an exists query against an
object which contains a text field with one of the subfields enabled can
throw an error (see #63585).

This commit reworks the subfields so that they are no longer implemented
as FieldMappers, and are no longer exposed to classes outside
TextFieldMapper either as MappedFieldTypes or as FieldMappers. The
parent TextFieldMapper handles indexing and analyzer registration,
PhraseFieldType is removed entirely, and PrefixFieldType is retained as
a private implementation for fast prefix queries but is unavailable for
querying directly.

Fixes #63585
Closes #63446
  • Loading branch information
romseygeek authored Nov 5, 2020
1 parent 33a38d4 commit cea93d1
Showing 4 changed files with 107 additions and 175 deletions.
194 changes: 51 additions & 143 deletions server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java
Original file line number Diff line number Diff line change
@@ -57,7 +57,6 @@
import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.Operations;
import org.elasticsearch.Version;
import org.elasticsearch.common.collect.Iterators;
import org.elasticsearch.common.lucene.Lucene;
import org.elasticsearch.common.lucene.search.AutomatonQueries;
import org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery;
@@ -80,7 +79,6 @@
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Objects;
@@ -92,6 +90,7 @@ public class TextFieldMapper extends FieldMapper {

public static final String CONTENT_TYPE = "text";
private static final String FAST_PHRASE_SUFFIX = "._index_phrase";
private static final String FAST_PREFIX_SUFFIX = "._index_prefix";

public static class Defaults {
public static final double FIELDDATA_MIN_FREQUENCY = 0;
@@ -330,7 +329,7 @@ private TextFieldType buildFieldType(FieldType fieldType, ContentPath contentPat
return ft;
}

private PrefixFieldMapper buildPrefixMapper(ContentPath contentPath, FieldType fieldType, TextFieldType tft) {
private SubFieldInfo buildPrefixInfo(ContentPath contentPath, FieldType fieldType, TextFieldType tft) {
if (indexPrefixes.get() == null) {
return null;
}
@@ -358,16 +357,15 @@ private PrefixFieldMapper buildPrefixMapper(ContentPath contentPath, FieldType f
if (fieldType.storeTermVectorOffsets()) {
pft.setStoreTermVectorOffsets(true);
}
PrefixFieldType prefixFieldType = new PrefixFieldType(tft, fullName + "._index_prefix", indexPrefixes.get());
tft.setPrefixFieldType(prefixFieldType);
return new PrefixFieldMapper(pft, prefixFieldType, new PrefixWrappedAnalyzer(
tft.setIndexPrefixes(indexPrefixes.get().minChars, indexPrefixes.get().maxChars);
return new SubFieldInfo(fullName + "._index_prefix", pft, new PrefixWrappedAnalyzer(
analyzers.getIndexAnalyzer().analyzer(),
analyzers.positionIncrementGap.get(),
prefixFieldType.minChars,
prefixFieldType.maxChars));
indexPrefixes.get().minChars,
indexPrefixes.get().maxChars));
}

private PhraseFieldMapper buildPhraseMapper(FieldType fieldType, TextFieldType parent) {
private SubFieldInfo buildPhraseInfo(FieldType fieldType, TextFieldType parent) {
if (indexPhrases.get() == false) {
return null;
}
@@ -381,24 +379,24 @@ private PhraseFieldMapper buildPhraseMapper(FieldType fieldType, TextFieldType p
parent.setIndexPhrases();
PhraseWrappedAnalyzer a
= new PhraseWrappedAnalyzer(analyzers.getIndexAnalyzer().analyzer(), analyzers.positionIncrementGap.get());
return new PhraseFieldMapper(phraseFieldType, new PhraseFieldType(parent), a);
return new SubFieldInfo(parent.name() + FAST_PHRASE_SUFFIX, phraseFieldType, a);
}

public Map<String, NamedAnalyzer> indexAnalyzers(String name,
PhraseFieldMapper phraseFieldMapper,
PrefixFieldMapper prefixFieldMapper) {
SubFieldInfo phraseFieldInfo,
SubFieldInfo prefixFieldInfo) {
Map<String, NamedAnalyzer> analyzers = new HashMap<>();
NamedAnalyzer main = this.analyzers.getIndexAnalyzer();
analyzers.put(name, main);
if (phraseFieldMapper != null) {
if (phraseFieldInfo != null) {
analyzers.put(
phraseFieldMapper.name(),
new NamedAnalyzer(main.name() + "_phrase", AnalyzerScope.INDEX, phraseFieldMapper.analyzer));
phraseFieldInfo.field,
new NamedAnalyzer(main.name() + "_phrase", AnalyzerScope.INDEX, phraseFieldInfo.analyzer));
}
if (prefixFieldMapper != null) {
if (prefixFieldInfo != null) {
analyzers.put(
prefixFieldMapper.name(),
new NamedAnalyzer(main.name() + "_prefix", AnalyzerScope.INDEX, prefixFieldMapper.analyzer));
prefixFieldInfo.field,
new NamedAnalyzer(main.name() + "_prefix", AnalyzerScope.INDEX, prefixFieldInfo.analyzer));
}
return analyzers;
}
@@ -407,12 +405,18 @@ public Map<String, NamedAnalyzer> indexAnalyzers(String name,
public TextFieldMapper build(ContentPath contentPath) {
FieldType fieldType = TextParams.buildFieldType(index, store, indexOptions, norms, termVectors);
TextFieldType tft = buildFieldType(fieldType, contentPath);
PhraseFieldMapper phraseFieldMapper = buildPhraseMapper(fieldType, tft);
PrefixFieldMapper prefixFieldMapper = buildPrefixMapper(contentPath, fieldType, tft);
SubFieldInfo phraseFieldInfo = buildPhraseInfo(fieldType, tft);
SubFieldInfo prefixFieldInfo = buildPrefixInfo(contentPath, fieldType, tft);
MultiFields multiFields = multiFieldsBuilder.build(this, contentPath);
for (Mapper mapper : multiFields) {
if (mapper.name().endsWith(FAST_PHRASE_SUFFIX) || mapper.name().endsWith(FAST_PREFIX_SUFFIX)) {
throw new MapperParsingException("Cannot use reserved field name [" + mapper.name() + "]");
}
}
return new TextFieldMapper(name, fieldType, tft,
indexAnalyzers(tft.name(), phraseFieldMapper, prefixFieldMapper),
prefixFieldMapper, phraseFieldMapper,
multiFieldsBuilder.build(this, contentPath), copyTo.build(), this);
indexAnalyzers(tft.name(), phraseFieldInfo, prefixFieldInfo),
prefixFieldInfo, phraseFieldInfo,
multiFields, copyTo.build(), this);
}
}

@@ -478,55 +482,22 @@ protected TokenStreamComponents wrapComponents(String fieldName, TokenStreamComp
}
}

static final class PhraseFieldType extends StringFieldType {

final TextFieldType parent;

PhraseFieldType(TextFieldType parent) {
super(parent.name() + FAST_PHRASE_SUFFIX, true, false, false, parent.getTextSearchInfo(), Collections.emptyMap());
this.parent = parent;
}

@Override
public String typeName() {
return "phrase";
}

@Override
public ValueFetcher valueFetcher(QueryShardContext context, SearchLookup searchLookup, String format) {
// Because this internal field is modelled as a multi-field, SourceValueFetcher will look up its
// parent field in _source. So we don't need to use the parent field name here.
return SourceValueFetcher.toString(name(), context, format);
}

@Override
public Query existsQuery(QueryShardContext context) {
throw new UnsupportedOperationException();
}
}

static final class PrefixFieldType extends StringFieldType {
private static final class PrefixFieldType extends StringFieldType {

final int minChars;
final int maxChars;
final TextFieldType parentField;

PrefixFieldType(TextFieldType parentField, String name, PrefixConfig config) {
this(parentField, name, config.minChars, config.maxChars);
}

PrefixFieldType(TextFieldType parentField, String name, int minChars, int maxChars) {
super(name, true, false, false, parentField.getTextSearchInfo(), Collections.emptyMap());
PrefixFieldType(TextFieldType parentField, int minChars, int maxChars) {
super(parentField.name() + FAST_PREFIX_SUFFIX, true, false, false, parentField.getTextSearchInfo(), Collections.emptyMap());
this.minChars = minChars;
this.maxChars = maxChars;
this.parentField = parentField;
}

@Override
public ValueFetcher valueFetcher(QueryShardContext context, SearchLookup searchLookup, String format) {
// Because this internal field is modelled as a multi-field, SourceValueFetcher will look up its
// parent field in _source. So we don't need to use the parent field name here.
return SourceValueFetcher.toString(name(), context, format);
throw new UnsupportedOperationException();
}

boolean accept(int length) {
@@ -590,67 +561,18 @@ public Query existsQuery(QueryShardContext context) {
}
}

private static final class PhraseFieldMapper extends FieldMapper {
private static final class SubFieldInfo {

private final Analyzer analyzer;
private final FieldType fieldType;
private final String field;

PhraseFieldMapper(FieldType fieldType, PhraseFieldType mappedFieldType, PhraseWrappedAnalyzer analyzer) {
super(mappedFieldType.name(), mappedFieldType, MultiFields.empty(), CopyTo.empty());
SubFieldInfo(String field, FieldType fieldType, Analyzer analyzer) {
this.fieldType = fieldType;
this.analyzer = analyzer;
this.field = field;
}

@Override
protected void parseCreateField(ParseContext context) {
throw new UnsupportedOperationException();
}

@Override
public Builder getMergeBuilder() {
return null;
}

@Override
protected String contentType() {
return "phrase";
}
}

private static final class PrefixFieldMapper extends FieldMapper {

private final Analyzer analyzer;
private final FieldType fieldType;

protected PrefixFieldMapper(FieldType fieldType, PrefixFieldType mappedFieldType, Analyzer analyzer) {
super(mappedFieldType.name(), mappedFieldType, MultiFields.empty(), CopyTo.empty());
this.analyzer = analyzer;
this.fieldType = fieldType;
}

void addField(ParseContext context, String value) {
context.doc().add(new Field(fieldType().name(), value, fieldType));
}

@Override
protected void parseCreateField(ParseContext context) {
throw new UnsupportedOperationException();
}

@Override
public Builder getMergeBuilder() {
return null;
}

@Override
protected String contentType() {
return "prefix";
}

@Override
public String toString() {
return fieldType().toString();
}
}

public static class TextFieldType extends StringFieldType {
@@ -702,8 +624,8 @@ int fielddataMinSegmentSize() {
return filter.minSegmentSize;
}

void setPrefixFieldType(PrefixFieldType prefixFieldType) {
this.prefixFieldType = prefixFieldType;
void setIndexPrefixes(int minChars, int maxChars) {
this.prefixFieldType = new PrefixFieldType(this, minChars, maxChars);
}

void setIndexPhrases() {
@@ -862,14 +784,14 @@ public IndexFieldData.Builder fielddataBuilder(String fullyQualifiedIndexName, S

private final Builder builder;
private final FieldType fieldType;
private final PrefixFieldMapper prefixFieldMapper;
private final PhraseFieldMapper phraseFieldMapper;
private final SubFieldInfo prefixFieldInfo;
private final SubFieldInfo phraseFieldInfo;

protected TextFieldMapper(String simpleName, FieldType fieldType,
TextFieldType mappedFieldType,
Map<String, NamedAnalyzer> indexAnalyzers,
PrefixFieldMapper prefixFieldMapper,
PhraseFieldMapper phraseFieldMapper,
SubFieldInfo prefixFieldInfo,
SubFieldInfo phraseFieldInfo,
MultiFields multiFields, CopyTo copyTo, Builder builder) {
super(simpleName, mappedFieldType, indexAnalyzers, multiFields, copyTo);
assert mappedFieldType.getTextSearchInfo().isTokenized();
@@ -878,8 +800,8 @@ protected TextFieldMapper(String simpleName, FieldType fieldType,
throw new IllegalArgumentException("Cannot enable fielddata on a [text] field that is not indexed: [" + name() + "]");
}
this.fieldType = fieldType;
this.prefixFieldMapper = prefixFieldMapper;
this.phraseFieldMapper = phraseFieldMapper;
this.prefixFieldInfo = prefixFieldInfo;
this.phraseFieldInfo = phraseFieldInfo;
this.builder = builder;
}

@@ -907,30 +829,15 @@ protected void parseCreateField(ParseContext context) throws IOException {
if (fieldType.omitNorms()) {
createFieldNamesField(context);
}
if (prefixFieldMapper != null) {
prefixFieldMapper.addField(context, value);
if (prefixFieldInfo != null) {
context.doc().add(new Field(prefixFieldInfo.field, value, prefixFieldInfo.fieldType));
}
if (phraseFieldMapper != null) {
context.doc().add(new Field(phraseFieldMapper.fieldType().name(), value, phraseFieldMapper.fieldType));
if (phraseFieldInfo != null) {
context.doc().add(new Field(phraseFieldInfo.field, value, phraseFieldInfo.fieldType));
}
}
}

@Override
public Iterator<Mapper> iterator() {
List<Mapper> subIterators = new ArrayList<>();
if (prefixFieldMapper != null) {
subIterators.add(prefixFieldMapper);
}
if (phraseFieldMapper != null) {
subIterators.add(phraseFieldMapper);
}
if (subIterators.size() == 0) {
return super.iterator();
}
return Iterators.concat(super.iterator(), subIterators.iterator());
}

@Override
protected String contentType() {
return CONTENT_TYPE;
@@ -1014,10 +921,11 @@ public static Query createPhrasePrefixQuery(TokenStream stream, String field, in
}

if (terms.length == 1) {
Term[] newTerms = Arrays.stream(terms[0])
SynonymQuery.Builder sb = new SynonymQuery.Builder(prefixField);
Arrays.stream(terms[0])
.map(term -> new Term(prefixField, term.bytes()))
.toArray(Term[]::new);
return new SynonymQuery(newTerms);
.forEach(sb::addTerm);
return sb.build();
}

SpanNearQuery.Builder spanQuery = new SpanNearQuery.Builder(field, true);
Loading

0 comments on commit cea93d1

Please sign in to comment.