Skip to content

Commit

Permalink
Simplify handling of keyword field normalizers (elastic#42002)
Browse files Browse the repository at this point in the history
We have a number of places in analysis-handling code where we check
if a field type is a keyword field, and if so then extract the normalizer rather
than pulling the index-time analyzer. However, a keyword normalizer is
really just a special case of an analyzer, so we should be able to simplify this
by setting the normalizer as the index-time analyzer at construction time.
  • Loading branch information
romseygeek authored May 10, 2019
1 parent c1d31f6 commit 6d95386
Show file tree
Hide file tree
Showing 8 changed files with 13 additions and 35 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,8 @@ public class AnnotatedTextHighlighter extends UnifiedHighlighter {
public static final String NAME = "annotated";

@Override
protected Analyzer getAnalyzer(DocumentMapper docMapper, MappedFieldType type, HitContext hitContext) {
return new AnnotatedHighlighterAnalyzer(super.getAnalyzer(docMapper, type, hitContext), hitContext);
protected Analyzer getAnalyzer(DocumentMapper docMapper, HitContext hitContext) {
return new AnnotatedHighlighterAnalyzer(super.getAnalyzer(docMapper, hitContext), hitContext);
}

// Convert the marked-up values held on-disk to plain-text versions for highlighting
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,9 @@
import org.elasticsearch.index.analysis.CharFilterFactory;
import org.elasticsearch.index.analysis.CustomAnalyzer;
import org.elasticsearch.index.analysis.IndexAnalyzers;
import org.elasticsearch.index.analysis.NamedAnalyzer;
import org.elasticsearch.index.analysis.NormalizingCharFilterFactory;
import org.elasticsearch.index.analysis.NormalizingTokenFilterFactory;
import org.elasticsearch.index.analysis.NamedAnalyzer;
import org.elasticsearch.index.analysis.TokenFilterFactory;
import org.elasticsearch.index.analysis.TokenizerFactory;
import org.elasticsearch.index.mapper.KeywordFieldMapper;
Expand Down Expand Up @@ -141,14 +141,8 @@ protected AnalyzeResponse shardOperation(AnalyzeRequest request, ShardId shardId
}
MappedFieldType fieldType = indexService.mapperService().fullName(request.field());
if (fieldType != null) {
if (fieldType.tokenized()) {
if (fieldType.tokenized() || fieldType instanceof KeywordFieldMapper.KeywordFieldType) {
analyzer = fieldType.indexAnalyzer();
} else if (fieldType instanceof KeywordFieldMapper.KeywordFieldType) {
analyzer = ((KeywordFieldMapper.KeywordFieldType) fieldType).normalizer();
if (analyzer == null) {
// this will be KeywordAnalyzer
analyzer = fieldType.indexAnalyzer();
}
} else {
throw new IllegalArgumentException("Can't process field [" + request.field() +
"], Analysis requests are only supported on tokenized fields");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -235,13 +235,14 @@ public String typeName() {
return CONTENT_TYPE;
}

public NamedAnalyzer normalizer() {
private NamedAnalyzer normalizer() {
return normalizer;
}

public void setNormalizer(NamedAnalyzer normalizer) {
checkIfFrozen();
this.normalizer = normalizer;
setIndexAnalyzer(normalizer);
}

public boolean splitQueriesOnWhitespace() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@
import org.elasticsearch.index.get.GetResult;
import org.elasticsearch.index.mapper.DocumentMapperForType;
import org.elasticsearch.index.mapper.IdFieldMapper;
import org.elasticsearch.index.mapper.KeywordFieldMapper;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.MapperService;
import org.elasticsearch.index.mapper.ParseContext;
Expand Down Expand Up @@ -235,12 +234,7 @@ private static Analyzer getAnalyzerAtField(IndexShard indexShard, String field,
analyzer = mapperService.getIndexAnalyzers().get(perFieldAnalyzer.get(field).toString());
} else {
MappedFieldType fieldType = mapperService.fullName(field);
if (fieldType instanceof KeywordFieldMapper.KeywordFieldType) {
KeywordFieldMapper.KeywordFieldType keywordFieldType = (KeywordFieldMapper.KeywordFieldType) fieldType;
analyzer = keywordFieldType.normalizer() == null ? keywordFieldType.indexAnalyzer() : keywordFieldType.normalizer();
} else {
analyzer = fieldType.indexAnalyzer();
}
analyzer = fieldType.indexAnalyzer();
}
if (analyzer == null) {
analyzer = mapperService.getIndexAnalyzers().getDefaultIndexAnalyzer();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,10 @@
*/
package org.elasticsearch.search.fetch.subphase.highlight;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.search.highlight.DefaultEncoder;
import org.apache.lucene.search.highlight.Encoder;
import org.apache.lucene.search.highlight.SimpleHTMLEncoder;
import org.elasticsearch.index.fieldvisitor.CustomFieldsVisitor;
import org.elasticsearch.index.mapper.DocumentMapper;
import org.elasticsearch.index.mapper.KeywordFieldMapper;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.search.fetch.FetchSubPhase;
import org.elasticsearch.search.internal.SearchContext;
Expand Down Expand Up @@ -78,13 +75,4 @@ public static class Encoders {
public static final Encoder HTML = new SimpleHTMLEncoder();
}

static Analyzer getAnalyzer(DocumentMapper docMapper, MappedFieldType type) {
if (type instanceof KeywordFieldMapper.KeywordFieldType) {
KeywordFieldMapper.KeywordFieldType keywordFieldType = (KeywordFieldMapper.KeywordFieldType) type;
if (keywordFieldType.normalizer() != null) {
return keywordFieldType.normalizer();
}
}
return docMapper.mappers().indexAnalyzer();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ public HighlightField highlight(HighlighterContext highlighterContext) {
int numberOfFragments = field.fieldOptions().numberOfFragments() == 0 ? 1 : field.fieldOptions().numberOfFragments();
ArrayList<TextFragment> fragsList = new ArrayList<>();
List<Object> textsToHighlight;
Analyzer analyzer = HighlightUtils.getAnalyzer(context.mapperService().documentMapper(hitContext.hit().getType()), fieldType);
Analyzer analyzer = context.mapperService().documentMapper(hitContext.hit().getType()).mappers().indexAnalyzer();
final int maxAnalyzedOffset = context.indexShard().indexSettings().getHighlightMaxAnalyzedOffset();

try {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ public HighlightField highlight(HighlighterContext highlighterContext) {
int numberOfFragments;
try {

final Analyzer analyzer = getAnalyzer(context.mapperService().documentMapper(hitContext.hit().getType()), fieldType,
final Analyzer analyzer = getAnalyzer(context.mapperService().documentMapper(hitContext.hit().getType()),
hitContext);
List<Object> fieldValues = loadFieldValues(fieldType, field, context, hitContext);
if (fieldValues.size() == 0) {
Expand Down Expand Up @@ -150,8 +150,8 @@ protected PassageFormatter getPassageFormatter(HitContext hitContext, SearchCont
}


protected Analyzer getAnalyzer(DocumentMapper docMapper, MappedFieldType type, HitContext hitContext) {
return HighlightUtils.getAnalyzer(docMapper, type);
protected Analyzer getAnalyzer(DocumentMapper docMapper, HitContext hitContext) {
return docMapper.mappers().indexAnalyzer();
}

protected List<Object> loadFieldValues(MappedFieldType fieldType, SearchContextHighlight.Field field, SearchContext context,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -401,7 +401,8 @@ public void testUpdateNormalizer() throws IOException {
() -> indexService.mapperService().merge("type",
new CompressedXContent(mapping2), MergeReason.MAPPING_UPDATE));
assertEquals(
"Mapper for [field] conflicts with existing mapping:\n[mapper [field] has different [normalizer]]",
"Mapper for [field] conflicts with existing mapping:\n" +
"[mapper [field] has different [analyzer], mapper [field] has different [normalizer]]",
e.getMessage());
}

Expand Down

0 comments on commit 6d95386

Please sign in to comment.