Skip to content

Commit

Permalink
Move index analyzer management to FieldMapper/MapperService (elastic#…
Browse files Browse the repository at this point in the history
…63937)

Index-time analyzers are currently specified on the MappedFieldType. This
has a number of unfortunate consequences; for example, field mappers that
index data into implementation sub-fields, such as prefix or phrase
accelerators on text fields, need to expose these sub-fields as MappedFieldTypes,
which means that they then appear in field caps, are externally searchable,
etc. It also adds index-time logic to a class that should only be concerned
with search-time behaviour.

This commit removes references to the index analyzer from MappedFieldType.
Instead, FieldMappers that use the terms index can pass either a single analyzer
or a Map of fields to analyzers to their super constructor, which are then
exposed via a new FieldMapper#indexAnalyzers() method; all index-time analysis
is mediated through the delegating analyzer wrapper on MapperService.
In a follow-up, this will make it possible to register multiple field analyzers from
a single FieldMapper, removing the need for 'hidden' mapper implementations
on text field, parent joins, and elsewhere.
  • Loading branch information
romseygeek committed Nov 4, 2020
1 parent 1091d1b commit 7a444df
Show file tree
Hide file tree
Showing 53 changed files with 457 additions and 381 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,6 @@ public static final class RankFeatureFieldType extends MappedFieldType {
public RankFeatureFieldType(String name, Map<String, String> meta, boolean positiveScoreImpact) {
super(name, true, false, false, TextSearchInfo.NONE, meta);
this.positiveScoreImpact = positiveScoreImpact;
setIndexAnalyzer(Lucene.KEYWORD_ANALYZER);
}

@Override
Expand Down Expand Up @@ -136,7 +135,7 @@ public Query termQuery(Object value, QueryShardContext context) {

private RankFeatureFieldMapper(String simpleName, MappedFieldType mappedFieldType,
MultiFields multiFields, CopyTo copyTo, boolean positiveScoreImpact) {
super(simpleName, mappedFieldType, multiFields, copyTo);
super(simpleName, mappedFieldType, Lucene.KEYWORD_ANALYZER, multiFields, copyTo);
this.positiveScoreImpact = positiveScoreImpact;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,6 @@ public static final class RankFeaturesFieldType extends MappedFieldType {

public RankFeaturesFieldType(String name, Map<String, String> meta) {
super(name, false, false, false, TextSearchInfo.NONE, meta);
setIndexAnalyzer(Lucene.KEYWORD_ANALYZER);
}

@Override
Expand Down Expand Up @@ -99,7 +98,7 @@ public Query termQuery(Object value, QueryShardContext context) {

private RankFeaturesFieldMapper(String simpleName, MappedFieldType mappedFieldType,
MultiFields multiFields, CopyTo copyTo) {
super(simpleName, mappedFieldType, multiFields, copyTo);
super(simpleName, mappedFieldType, Lucene.KEYWORD_ANALYZER, multiFields, copyTo);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
Expand Down Expand Up @@ -95,18 +96,14 @@ public static class Defaults {
public static final TypeParser PARSER
= new TypeParser((n, c) -> new Builder(n, () -> c.getIndexAnalyzers().getDefaultIndexAnalyzer()));

private static SearchAsYouTypeFieldMapper toType(FieldMapper in) {
return (SearchAsYouTypeFieldMapper) in;
}

private static SearchAsYouTypeFieldType ft(FieldMapper in) {
return toType(in).fieldType();
private static Builder builder(FieldMapper in) {
return ((SearchAsYouTypeFieldMapper)in).builder;
}

public static class Builder extends FieldMapper.Builder {

private final Parameter<Boolean> index = Parameter.indexParam(m -> toType(m).index, true);
private final Parameter<Boolean> store = Parameter.storeParam(m -> toType(m).store, false);
private final Parameter<Boolean> index = Parameter.indexParam(m -> builder(m).index.get(), true);
private final Parameter<Boolean> store = Parameter.storeParam(m -> builder(m).store.get(), false);

// This is only here because for some reason the initial impl of this always serialized
// `doc_values=false`, even though it cannot be set; and so we need to continue
Expand All @@ -120,7 +117,7 @@ public static class Builder extends FieldMapper.Builder {
.alwaysSerialize();

private final Parameter<Integer> maxShingleSize = Parameter.intParam("max_shingle_size", false,
m -> toType(m).maxShingleSize, Defaults.MAX_SHINGLE_SIZE)
m -> builder(m).maxShingleSize.get(), Defaults.MAX_SHINGLE_SIZE)
.setValidator(v -> {
if (v < MAX_SHINGLE_SIZE_LOWER_BOUND || v > MAX_SHINGLE_SIZE_UPPER_BOUND) {
throw new MapperParsingException("[max_shingle_size] must be at least [" + MAX_SHINGLE_SIZE_LOWER_BOUND
Expand All @@ -130,17 +127,17 @@ public static class Builder extends FieldMapper.Builder {
.alwaysSerialize();

final TextParams.Analyzers analyzers;
final Parameter<SimilarityProvider> similarity = TextParams.similarity(m -> ft(m).getTextSearchInfo().getSimilarity());
final Parameter<SimilarityProvider> similarity = TextParams.similarity(m -> builder(m).similarity.get());

final Parameter<String> indexOptions = TextParams.indexOptions(m -> toType(m).indexOptions);
final Parameter<Boolean> norms = TextParams.norms(true, m -> ft(m).getTextSearchInfo().hasNorms());
final Parameter<String> termVectors = TextParams.termVectors(m -> toType(m).termVectors);
final Parameter<String> indexOptions = TextParams.indexOptions(m -> builder(m).indexOptions.get());
final Parameter<Boolean> norms = TextParams.norms(true, m -> builder(m).norms.get());
final Parameter<String> termVectors = TextParams.termVectors(m -> builder(m).termVectors.get());

private final Parameter<Map<String, String>> meta = Parameter.metaParam();

public Builder(String name, Supplier<NamedAnalyzer> defaultAnalyzer) {
super(name);
this.analyzers = new TextParams.Analyzers(defaultAnalyzer);
this.analyzers = new TextParams.Analyzers(defaultAnalyzer, m -> builder(m).analyzers);
}

@Override
Expand All @@ -159,12 +156,15 @@ public SearchAsYouTypeFieldMapper build(Mapper.BuilderContext context) {
fieldType.setStored(store.getValue());
TextParams.setTermVectorParams(termVectors.getValue(), fieldType);

Map<String, NamedAnalyzer> indexAnalyzers = new HashMap<>();

NamedAnalyzer indexAnalyzer = analyzers.getIndexAnalyzer();
NamedAnalyzer searchAnalyzer = analyzers.getSearchAnalyzer();

SearchAsYouTypeFieldType ft = new SearchAsYouTypeFieldType(buildFullName(context), fieldType, similarity.getValue(),
analyzers.getSearchAnalyzer(), analyzers.getSearchQuoteAnalyzer(), meta.getValue());
ft.setIndexAnalyzer(analyzers.getIndexAnalyzer());

indexAnalyzers.put(ft.name(), indexAnalyzer);

// set up the prefix field
FieldType prefixft = new FieldType(fieldType);
Expand All @@ -182,8 +182,10 @@ public SearchAsYouTypeFieldMapper build(Mapper.BuilderContext context) {
TextSearchInfo prefixSearchInfo = new TextSearchInfo(prefixft, similarity.getValue(), prefixSearchWrapper, searchAnalyzer);
final PrefixFieldType prefixFieldType
= new PrefixFieldType(fullName, prefixSearchInfo, Defaults.MIN_GRAM, Defaults.MAX_GRAM);
prefixFieldType.setIndexAnalyzer(new NamedAnalyzer(indexAnalyzer.name(), AnalyzerScope.INDEX, prefixIndexWrapper));
final NamedAnalyzer prefixAnalyzer
= new NamedAnalyzer(indexAnalyzer.name(), AnalyzerScope.INDEX, prefixIndexWrapper);
final PrefixFieldMapper prefixFieldMapper = new PrefixFieldMapper(prefixft, prefixFieldType);
indexAnalyzers.put(prefixFieldType.name(), prefixAnalyzer);

// set up the shingle fields
final ShingleFieldMapper[] shingleFieldMappers = new ShingleFieldMapper[maxShingleSize.getValue() - 1];
Expand All @@ -203,14 +205,16 @@ public SearchAsYouTypeFieldMapper build(Mapper.BuilderContext context) {
TextSearchInfo textSearchInfo
= new TextSearchInfo(shingleft, similarity.getValue(), shingleSearchWrapper, shingleSearchQuoteWrapper);
final ShingleFieldType shingleFieldType = new ShingleFieldType(fieldName, shingleSize, textSearchInfo);
shingleFieldType.setIndexAnalyzer(new NamedAnalyzer(indexAnalyzer.name(), AnalyzerScope.INDEX, shingleIndexWrapper));
shingleFieldType.setPrefixFieldType(prefixFieldType);
shingleFieldTypes[i] = shingleFieldType;
NamedAnalyzer shingleAnalyzer
= new NamedAnalyzer(indexAnalyzer.name(), AnalyzerScope.INDEX, shingleIndexWrapper);
shingleFieldMappers[i] = new ShingleFieldMapper(shingleft, shingleFieldType);
indexAnalyzers.put(shingleFieldType.name(), shingleAnalyzer);
}
ft.setPrefixField(prefixFieldType);
ft.setShingleFields(shingleFieldTypes);
return new SearchAsYouTypeFieldMapper(name, ft, copyTo.build(), prefixFieldMapper, shingleFieldMappers, this);
return new SearchAsYouTypeFieldMapper(name, ft, copyTo.build(), indexAnalyzers, prefixFieldMapper, shingleFieldMappers, this);
}
}

Expand Down Expand Up @@ -546,29 +550,23 @@ public SpanQuery spanPrefixQuery(String value, SpanMultiTermQueryWrapper.SpanRew
}
}

private final boolean index;
private final boolean store;
private final String indexOptions;
private final String termVectors;

private final int maxShingleSize;
private final PrefixFieldMapper prefixField;
private final ShingleFieldMapper[] shingleFields;
private final Builder builder;

public SearchAsYouTypeFieldMapper(String simpleName,
SearchAsYouTypeFieldType mappedFieldType,
CopyTo copyTo,
Map<String, NamedAnalyzer> indexAnalyzers,
PrefixFieldMapper prefixField,
ShingleFieldMapper[] shingleFields,
Builder builder) {
super(simpleName, mappedFieldType, MultiFields.empty(), copyTo);
super(simpleName, mappedFieldType, indexAnalyzers, MultiFields.empty(), copyTo);
this.prefixField = prefixField;
this.shingleFields = shingleFields;
this.maxShingleSize = builder.maxShingleSize.getValue();
this.index = builder.index.getValue();
this.store = builder.store.getValue();
this.indexOptions = builder.indexOptions.getValue();
this.termVectors = builder.termVectors.getValue();
this.builder = builder;
}

@Override
Expand All @@ -578,12 +576,12 @@ protected void parseCreateField(ParseContext context) throws IOException {
return;
}

if (this.index == false && this.store == false) {
if (this.builder.index.get() == false && this.builder.store.get() == false) {
return;
}

context.doc().add(new Field(fieldType().name(), value, fieldType().fieldType));
if (this.index) {
if (this.builder.index.get()) {
for (ShingleFieldMapper subFieldMapper : shingleFields) {
context.doc().add(new Field(subFieldMapper.fieldType().name(), value, subFieldMapper.getLuceneFieldType()));
}
Expand All @@ -599,9 +597,8 @@ protected String contentType() {
return CONTENT_TYPE;
}

@Override
public FieldMapper.Builder getMergeBuilder() {
return new Builder(simpleName(), () -> fieldType().indexAnalyzer()).init(this);
return new Builder(simpleName(), builder.analyzers.indexAnalyzer::getDefaultValue).init(this);
}

public static String getShingleFieldName(String parentField, int shingleSize) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.Stream;
Expand Down Expand Up @@ -180,12 +181,12 @@ public void testDefaultConfiguration() throws IOException {
assertRootFieldMapper(rootMapper, 3, "default");

PrefixFieldMapper prefixFieldMapper = getPrefixFieldMapper(defaultMapper, "field._index_prefix");
assertPrefixFieldType(prefixFieldMapper.fieldType(), 3, "default");
assertPrefixFieldType(prefixFieldMapper, rootMapper.indexAnalyzers(), 3, "default");

assertShingleFieldType(
getShingleFieldMapper(defaultMapper, "field._2gram").fieldType(), 2, "default", prefixFieldMapper.fieldType());
assertShingleFieldType(
getShingleFieldMapper(defaultMapper, "field._3gram").fieldType(), 3, "default", prefixFieldMapper.fieldType());
assertShingleFieldType(getShingleFieldMapper(defaultMapper, "field._2gram"),
rootMapper.indexAnalyzers(), 2, "default", prefixFieldMapper.fieldType());
assertShingleFieldType(getShingleFieldMapper(defaultMapper, "field._3gram"),
rootMapper.indexAnalyzers(), 3, "default", prefixFieldMapper.fieldType());
}

public void testConfiguration() throws IOException {
Expand All @@ -201,14 +202,14 @@ public void testConfiguration() throws IOException {
assertRootFieldMapper(rootMapper, maxShingleSize, analyzerName);

PrefixFieldMapper prefixFieldMapper = getPrefixFieldMapper(defaultMapper, "field._index_prefix");
assertPrefixFieldType(prefixFieldMapper.fieldType(), maxShingleSize, analyzerName);
assertPrefixFieldType(prefixFieldMapper, rootMapper.indexAnalyzers(), maxShingleSize, analyzerName);

assertShingleFieldType(
getShingleFieldMapper(defaultMapper, "field._2gram").fieldType(), 2, analyzerName, prefixFieldMapper.fieldType());
assertShingleFieldType(
getShingleFieldMapper(defaultMapper, "field._3gram").fieldType(), 3, analyzerName, prefixFieldMapper.fieldType());
assertShingleFieldType(
getShingleFieldMapper(defaultMapper, "field._4gram").fieldType(), 4, analyzerName, prefixFieldMapper.fieldType());
assertShingleFieldType(getShingleFieldMapper(defaultMapper, "field._2gram"),
rootMapper.indexAnalyzers(), 2, analyzerName, prefixFieldMapper.fieldType());
assertShingleFieldType(getShingleFieldMapper(defaultMapper, "field._3gram"),
rootMapper.indexAnalyzers(), 3, analyzerName, prefixFieldMapper.fieldType());
assertShingleFieldType(getShingleFieldMapper(defaultMapper, "field._4gram"),
rootMapper.indexAnalyzers(), 4, analyzerName, prefixFieldMapper.fieldType());
}

public void testSimpleMerge() throws IOException {
Expand Down Expand Up @@ -600,47 +601,53 @@ private static void assertRootFieldMapper(SearchAsYouTypeFieldMapper mapper,

assertThat(mapper.maxShingleSize(), equalTo(maxShingleSize));
assertThat(mapper.fieldType(), notNullValue());
assertSearchAsYouTypeFieldType(mapper.fieldType(), maxShingleSize, analyzerName, mapper.prefixField().fieldType());
assertSearchAsYouTypeFieldType(mapper, mapper.fieldType(), maxShingleSize, analyzerName, mapper.prefixField().fieldType());

assertThat(mapper.prefixField(), notNullValue());
assertThat(mapper.prefixField().fieldType().parentField, equalTo(mapper.name()));
assertPrefixFieldType(mapper.prefixField().fieldType(), maxShingleSize, analyzerName);
assertPrefixFieldType(mapper.prefixField(), mapper.indexAnalyzers, maxShingleSize, analyzerName);


for (int shingleSize = 2; shingleSize <= maxShingleSize; shingleSize++) {
final ShingleFieldMapper shingleFieldMapper = mapper.shingleFields()[shingleSize - 2];
assertThat(shingleFieldMapper, notNullValue());
assertShingleFieldType(shingleFieldMapper.fieldType(), shingleSize, analyzerName, mapper.prefixField().fieldType());
assertShingleFieldType(shingleFieldMapper, mapper.indexAnalyzers, shingleSize,
analyzerName, mapper.prefixField().fieldType());
}

final int numberOfShingleSubfields = (maxShingleSize - 2) + 1;
assertThat(mapper.shingleFields().length, equalTo(numberOfShingleSubfields));
}

private static void assertSearchAsYouTypeFieldType(SearchAsYouTypeFieldType fieldType, int maxShingleSize,
private static void assertSearchAsYouTypeFieldType(SearchAsYouTypeFieldMapper mapper,
SearchAsYouTypeFieldType fieldType,
int maxShingleSize,
String analyzerName,
PrefixFieldType prefixFieldType) {

assertThat(fieldType.shingleFields.length, equalTo(maxShingleSize - 1));
for (NamedAnalyzer analyzer : asList(fieldType.indexAnalyzer(), fieldType.getTextSearchInfo().getSearchAnalyzer())) {
NamedAnalyzer indexAnalyzer = mapper.indexAnalyzers().get(fieldType.name());
for (NamedAnalyzer analyzer : asList(indexAnalyzer, fieldType.getTextSearchInfo().getSearchAnalyzer())) {
assertThat(analyzer.name(), equalTo(analyzerName));
}
int shingleSize = 2;
for (ShingleFieldType shingleField : fieldType.shingleFields) {
assertShingleFieldType(shingleField, shingleSize++, analyzerName, prefixFieldType);
for (ShingleFieldMapper shingleField : mapper.shingleFields()) {
assertShingleFieldType(shingleField, mapper.indexAnalyzers(), shingleSize++, analyzerName, prefixFieldType);
}

assertThat(fieldType.prefixField, equalTo(prefixFieldType));
}

private static void assertShingleFieldType(ShingleFieldType fieldType,
private static void assertShingleFieldType(ShingleFieldMapper mapper,
Map<String, NamedAnalyzer> indexAnalyzers,
int shingleSize,
String analyzerName,
PrefixFieldType prefixFieldType) {

ShingleFieldType fieldType = mapper.fieldType();
assertThat(fieldType.shingleSize, equalTo(shingleSize));

for (NamedAnalyzer analyzer : asList(fieldType.indexAnalyzer(), fieldType.getTextSearchInfo().getSearchAnalyzer())) {
for (NamedAnalyzer analyzer : asList(indexAnalyzers.get(fieldType.name()), fieldType.getTextSearchInfo().getSearchAnalyzer())) {
assertThat(analyzer.name(), equalTo(analyzerName));
if (shingleSize > 1) {
final SearchAsYouTypeAnalyzer wrappedAnalyzer = (SearchAsYouTypeAnalyzer) analyzer.analyzer();
Expand All @@ -653,12 +660,15 @@ private static void assertShingleFieldType(ShingleFieldType fieldType,

}

private static void assertPrefixFieldType(PrefixFieldType fieldType, int shingleSize, String analyzerName) {
for (NamedAnalyzer analyzer : asList(fieldType.indexAnalyzer(), fieldType.getTextSearchInfo().getSearchAnalyzer())) {
private static void assertPrefixFieldType(PrefixFieldMapper mapper, Map<String, NamedAnalyzer> indexAnalyzers,
int shingleSize, String analyzerName) {
PrefixFieldType fieldType = mapper.fieldType();
NamedAnalyzer indexAnalyzer = indexAnalyzers.get(fieldType.name());
for (NamedAnalyzer analyzer : asList(indexAnalyzer, fieldType.getTextSearchInfo().getSearchAnalyzer())) {
assertThat(analyzer.name(), equalTo(analyzerName));
}

final SearchAsYouTypeAnalyzer wrappedIndexAnalyzer = (SearchAsYouTypeAnalyzer) fieldType.indexAnalyzer().analyzer();
final SearchAsYouTypeAnalyzer wrappedIndexAnalyzer = (SearchAsYouTypeAnalyzer) indexAnalyzer.analyzer();
final SearchAsYouTypeAnalyzer wrappedSearchAnalyzer
= (SearchAsYouTypeAnalyzer) fieldType.getTextSearchInfo().getSearchAnalyzer().analyzer();
for (SearchAsYouTypeAnalyzer analyzer : asList(wrappedIndexAnalyzer, wrappedSearchAnalyzer)) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,6 @@ public void testPrefixQuery() {

public void testFetchSourceValue() throws IOException {
SearchAsYouTypeFieldType fieldType = createFieldType();
fieldType.setIndexAnalyzer(Lucene.STANDARD_ANALYZER);

assertEquals(org.elasticsearch.common.collect.List.of("value"), fetchSourceValue(fieldType, "value"));
assertEquals(org.elasticsearch.common.collect.List.of("42"), fetchSourceValue(fieldType, 42L));
Expand Down
Loading

0 comments on commit 7a444df

Please sign in to comment.