From 92e4b15048e5cecbb0613548d544306dee2e3720 Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Wed, 25 Nov 2020 17:00:18 +0000 Subject: [PATCH] Use ValueFetcher when loading text snippets to highlight (#63572) (#65441) HighlighterUtils.loadFieldValues() loads values directly from the source, and then callers have to deal with filtering out values that would have been removed by an ignore_above filter on keyword fields. Instead, we can use the ValueFetcher for the relevant field, which handles all this logic for us. Closes #59931. --- .../reference/migration/migrate_7_11.asciidoc | 34 +++++++++++++++++++ .../highlight/AnnotatedTextHighlighter.java | 6 ++-- .../AnnotatedTextHighlighterTests.java | 1 - .../highlight/HighlighterSearchIT.java | 30 +++++++++++++++- .../uhighlight/CustomUnifiedHighlighter.java | 7 ---- .../subphase/highlight/HighlightUtils.java | 19 +++++------ .../subphase/highlight/PlainHighlighter.java | 11 ++---- .../highlight/UnifiedHighlighter.java | 14 +++----- .../CustomUnifiedHighlighterTests.java | 1 - 9 files changed, 81 insertions(+), 42 deletions(-) create mode 100644 docs/reference/migration/migrate_7_11.asciidoc diff --git a/docs/reference/migration/migrate_7_11.asciidoc b/docs/reference/migration/migrate_7_11.asciidoc new file mode 100644 index 0000000000000..20ab157435007 --- /dev/null +++ b/docs/reference/migration/migrate_7_11.asciidoc @@ -0,0 +1,34 @@ +[[breaking-changes-7.11]] +== Breaking changes in 7.11 +++++ +7.11 +++++ + +This section discusses the changes that you need to be aware of when migrating +your application to {es} 7.11. + +See also <> and <>. + +// * <> +// * <> + +//NOTE: The notable-breaking-changes tagged regions are re-used in the +//Installation and Upgrade Guide + +//tag::notable-breaking-changes[] + +[discrete] +[[breaking_711_search_changes]] +=== Search changes + +[[highlight-normalization]] +.Keyword fields with a custom normalizer will use the normalized form when highlighting +[%collapsible] +==== +*Details* + +Highlighters now use the same framework to load their values as the +`fields` section of a search response. This means that normalization +will be applied to the values of a keyword field; for example, a +field configured with a lowercase normalizer will return highlighted +snippets in lower case. +==== diff --git a/plugins/mapper-annotated-text/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/AnnotatedTextHighlighter.java b/plugins/mapper-annotated-text/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/AnnotatedTextHighlighter.java index 301f0059448c9..c29fbce6f0663 100644 --- a/plugins/mapper-annotated-text/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/AnnotatedTextHighlighter.java +++ b/plugins/mapper-annotated-text/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/AnnotatedTextHighlighter.java @@ -26,8 +26,8 @@ import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedHighlighterAnalyzer; import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedText; +import org.elasticsearch.index.query.QueryShardContext; import org.elasticsearch.search.fetch.FetchSubPhase.HitContext; -import org.elasticsearch.search.fetch.subphase.highlight.SearchHighlightContext.Field; import java.io.IOException; import java.util.ArrayList; @@ -41,12 +41,12 @@ public class AnnotatedTextHighlighter extends UnifiedHighlighter { @Override protected List loadFieldValues( CustomUnifiedHighlighter highlighter, + QueryShardContext qsc, MappedFieldType fieldType, - Field field, HitContext hitContext, boolean forceSource ) throws IOException { - List fieldValues = super.loadFieldValues(highlighter, fieldType, field, hitContext, forceSource); + List fieldValues = super.loadFieldValues(highlighter, qsc, fieldType, hitContext, forceSource); List strings = new ArrayList<>(fieldValues.size()); AnnotatedText[] annotations = new AnnotatedText[fieldValues.size()]; diff --git a/plugins/mapper-annotated-text/src/test/java/org/elasticsearch/search/fetch/subphase/highlight/AnnotatedTextHighlighterTests.java b/plugins/mapper-annotated-text/src/test/java/org/elasticsearch/search/fetch/subphase/highlight/AnnotatedTextHighlighterTests.java index eeeb79aca3770..6b0851c7ad3f4 100644 --- a/plugins/mapper-annotated-text/src/test/java/org/elasticsearch/search/fetch/subphase/highlight/AnnotatedTextHighlighterTests.java +++ b/plugins/mapper-annotated-text/src/test/java/org/elasticsearch/search/fetch/subphase/highlight/AnnotatedTextHighlighterTests.java @@ -118,7 +118,6 @@ private void assertHighlightOneDoc(String fieldName, String []markedUpInputs, noMatchSize, expectedPassages.length, name -> "text".equals(name), - Integer.MAX_VALUE, Integer.MAX_VALUE ); highlighter.setFieldMatcher((name) -> "text".equals(name)); diff --git a/server/src/internalClusterTest/java/org/elasticsearch/search/fetch/subphase/highlight/HighlighterSearchIT.java b/server/src/internalClusterTest/java/org/elasticsearch/search/fetch/subphase/highlight/HighlighterSearchIT.java index 2a1b7ce09108c..a047e11a4b854 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/search/fetch/subphase/highlight/HighlighterSearchIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/search/fetch/subphase/highlight/HighlighterSearchIT.java @@ -2775,6 +2775,34 @@ public void testKeywordFieldHighlighting() throws IOException { equalTo("some text")); } + public void testCopyToFields() throws Exception { + XContentBuilder b = jsonBuilder().startObject().startObject("properties"); + b.startObject("foo"); + { + b.field("type", "text"); + b.field("copy_to", "foo_copy"); + } + b.endObject(); + b.startObject("foo_copy").field("type", "text").endObject(); + b.endObject().endObject(); + prepareCreate("test").addMapping("_doc", b).get(); + + client().prepareIndex("test", "_doc").setId("1") + .setSource(jsonBuilder().startObject().field("foo", "how now brown cow").endObject()) + .setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE) + .get(); + + SearchResponse response = client().prepareSearch() + .setQuery(matchQuery("foo_copy", "brown")) + .highlighter(new HighlightBuilder().field(new Field("foo_copy"))) + .get(); + + assertHitCount(response, 1); + HighlightField field = response.getHits().getAt(0).getHighlightFields().get("foo_copy"); + assertThat(field.getFragments().length, equalTo(1)); + assertThat(field.getFragments()[0].string(), equalTo("how now brown cow")); + } + public void testACopyFieldWithNestedQuery() throws Exception { String mapping = Strings.toString(jsonBuilder().startObject().startObject("type").startObject("properties") .startObject("foo") @@ -2986,7 +3014,7 @@ public void testWithNormalizer() throws Exception { assertHitCount(searchResponse, 1); HighlightField field = searchResponse.getHits().getAt(0).getHighlightFields().get("keyword"); assertThat(field.getFragments().length, equalTo(1)); - assertThat(field.getFragments()[0].string(), equalTo("Hello World")); + assertThat(field.getFragments()[0].string(), equalTo("hello world")); } } diff --git a/server/src/main/java/org/apache/lucene/search/uhighlight/CustomUnifiedHighlighter.java b/server/src/main/java/org/apache/lucene/search/uhighlight/CustomUnifiedHighlighter.java index 3a2693a2d42cd..c31cf840c8a12 100644 --- a/server/src/main/java/org/apache/lucene/search/uhighlight/CustomUnifiedHighlighter.java +++ b/server/src/main/java/org/apache/lucene/search/uhighlight/CustomUnifiedHighlighter.java @@ -64,7 +64,6 @@ public class CustomUnifiedHighlighter extends UnifiedHighlighter { private final Locale breakIteratorLocale; private final int noMatchSize; private final FieldHighlighter fieldHighlighter; - private final int keywordIgnoreAbove; private final int maxAnalyzedOffset; /** @@ -84,7 +83,6 @@ public class CustomUnifiedHighlighter extends UnifiedHighlighter { * @param noMatchSize The size of the text that should be returned when no highlighting can be performed. * @param maxPassages the maximum number of passes to highlight * @param fieldMatcher decides which terms should be highlighted - * @param keywordIgnoreAbove if the field's value is longer than this we'll skip it * @param maxAnalyzedOffset if the field is more than this long we'll refuse to use the ANALYZED * offset source for it because it'd be super slow */ @@ -98,7 +96,6 @@ public CustomUnifiedHighlighter(IndexSearcher searcher, int noMatchSize, int maxPassages, Predicate fieldMatcher, - int keywordIgnoreAbove, int maxAnalyzedOffset) throws IOException { super(searcher, analyzer); this.offsetSource = offsetSource; @@ -109,7 +106,6 @@ public CustomUnifiedHighlighter(IndexSearcher searcher, this.field = field; this.noMatchSize = noMatchSize; this.setFieldMatcher(fieldMatcher); - this.keywordIgnoreAbove = keywordIgnoreAbove; this.maxAnalyzedOffset = maxAnalyzedOffset; fieldHighlighter = getFieldHighlighter(field, query, extractTerms(query), maxPassages); } @@ -127,9 +123,6 @@ public Snippet[] highlightField(LeafReader reader, int docId, CheckedSupplier keywordIgnoreAbove) { - return null; // skip highlighting keyword terms that were ignored during indexing - } if ((offsetSource == OffsetSource.ANALYSIS) && (fieldValueLength > maxAnalyzedOffset)) { throw new IllegalArgumentException( "The length of [" diff --git a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightUtils.java b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightUtils.java index 49f5fd9a8c4b7..6cadca082c559 100644 --- a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightUtils.java +++ b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/HighlightUtils.java @@ -23,8 +23,9 @@ import org.apache.lucene.search.highlight.SimpleHTMLEncoder; import org.elasticsearch.index.fieldvisitor.CustomFieldsVisitor; import org.elasticsearch.index.mapper.MappedFieldType; +import org.elasticsearch.index.mapper.ValueFetcher; +import org.elasticsearch.index.query.QueryShardContext; import org.elasticsearch.search.fetch.FetchSubPhase; -import org.elasticsearch.search.lookup.SourceLookup; import java.io.IOException; import java.util.Collections; @@ -46,24 +47,20 @@ private HighlightUtils() { * Load field values for highlighting. */ public static List loadFieldValues(MappedFieldType fieldType, + QueryShardContext qsc, FetchSubPhase.HitContext hitContext, boolean forceSource) throws IOException { - //percolator needs to always load from source, thus it sets the global force source to true - List textsToHighlight; if (forceSource == false && fieldType.isStored()) { CustomFieldsVisitor fieldVisitor = new CustomFieldsVisitor(singleton(fieldType.name()), false); hitContext.reader().document(hitContext.docId(), fieldVisitor); - textsToHighlight = fieldVisitor.fields().get(fieldType.name()); + List textsToHighlight = fieldVisitor.fields().get(fieldType.name()); if (textsToHighlight == null) { - // Can happen if the document doesn't have the field to highlight - textsToHighlight = Collections.emptyList(); + return Collections.emptyList(); } - } else { - SourceLookup sourceLookup = hitContext.sourceLookup(); - textsToHighlight = sourceLookup.extractRawValues(fieldType.name()); + return textsToHighlight; } - assert textsToHighlight != null; - return textsToHighlight; + ValueFetcher fetcher = fieldType.valueFetcher(qsc, null); + return fetcher.fetchValues(hitContext.sourceLookup()); } public static class Encoders { diff --git a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/PlainHighlighter.java b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/PlainHighlighter.java index 523f7e0a018d6..c32b2c81f8d3f 100644 --- a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/PlainHighlighter.java +++ b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/PlainHighlighter.java @@ -36,7 +36,6 @@ import org.apache.lucene.util.CollectionUtil; import org.elasticsearch.common.text.Text; import org.elasticsearch.index.IndexSettings; -import org.elasticsearch.index.mapper.KeywordFieldMapper; import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.search.fetch.FetchContext; import org.elasticsearch.search.fetch.FetchSubPhase; @@ -101,20 +100,14 @@ public HighlightField highlight(FieldHighlightContext fieldContext) throws IOExc ArrayList fragsList = new ArrayList<>(); List textsToHighlight; Analyzer analyzer = context.getQueryShardContext().getFieldNameIndexAnalyzer(); - Integer keywordIgnoreAbove = null; - if (fieldType instanceof KeywordFieldMapper.KeywordFieldType) { - keywordIgnoreAbove = ((KeywordFieldMapper.KeywordFieldType)fieldType).ignoreAbove(); - } final int maxAnalyzedOffset = context.getQueryShardContext().getIndexSettings().getHighlightMaxAnalyzedOffset(); - textsToHighlight = HighlightUtils.loadFieldValues(fieldType, hitContext, fieldContext.forceSource); + textsToHighlight + = HighlightUtils.loadFieldValues(fieldType, context.getQueryShardContext(), hitContext, fieldContext.forceSource); for (Object textToHighlight : textsToHighlight) { String text = convertFieldValue(fieldType, textToHighlight); int textLength = text.length(); - if (keywordIgnoreAbove != null && textLength > keywordIgnoreAbove) { - continue; // skip highlighting keyword terms that were ignored during indexing - } if (textLength > maxAnalyzedOffset) { throw new IllegalArgumentException( "The length of [" + fieldContext.fieldName + "] field of [" + hitContext.hit().getId() + diff --git a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/UnifiedHighlighter.java b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/UnifiedHighlighter.java index 5f8871f72ffa0..be8d919c89571 100644 --- a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/UnifiedHighlighter.java +++ b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/UnifiedHighlighter.java @@ -34,9 +34,9 @@ import org.elasticsearch.common.Strings; import org.elasticsearch.common.text.Text; import org.elasticsearch.index.mapper.IdFieldMapper; -import org.elasticsearch.index.mapper.KeywordFieldMapper; import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.mapper.TextSearchInfo; +import org.elasticsearch.index.query.QueryShardContext; import org.elasticsearch.search.fetch.FetchSubPhase; import org.elasticsearch.search.fetch.FetchSubPhase.HitContext; @@ -72,7 +72,8 @@ public HighlightField highlight(FieldHighlightContext fieldContext) throws IOExc FetchSubPhase.HitContext hitContext = fieldContext.hitContext; CheckedSupplier loadFieldValues = () -> { - List fieldValues = loadFieldValues(highlighter, fieldType, field, hitContext, fieldContext.forceSource); + List fieldValues = loadFieldValues(highlighter, fieldContext.context.getQueryShardContext(), fieldType, + hitContext, fieldContext.forceSource); if (fieldValues.size() == 0) { return null; } @@ -111,10 +112,6 @@ CustomUnifiedHighlighter buildHighlighter(FieldHighlightContext fieldContext) th ? HighlightUtils.Encoders.HTML : HighlightUtils.Encoders.DEFAULT; int maxAnalyzedOffset = fieldContext.context.getQueryShardContext().getIndexSettings().getHighlightMaxAnalyzedOffset(); - int keywordIgnoreAbove = Integer.MAX_VALUE; - if (fieldContext.fieldType instanceof KeywordFieldMapper.KeywordFieldType) { - keywordIgnoreAbove = ((KeywordFieldMapper.KeywordFieldType)fieldContext.fieldType).ignoreAbove(); - } int numberOfFragments = fieldContext.field.fieldOptions().numberOfFragments(); Analyzer analyzer = wrapAnalyzer(fieldContext.context.getQueryShardContext().getFieldNameIndexAnalyzer()); PassageFormatter passageFormatter = getPassageFormatter(fieldContext.hitContext, fieldContext.field, encoder); @@ -151,7 +148,6 @@ CustomUnifiedHighlighter buildHighlighter(FieldHighlightContext fieldContext) th fieldContext.field.fieldOptions().noMatchSize(), higlighterNumberOfFragments, fieldMatcher(fieldContext), - keywordIgnoreAbove, maxAnalyzedOffset ); } @@ -167,12 +163,12 @@ protected Analyzer wrapAnalyzer(Analyzer analyzer) { protected List loadFieldValues( CustomUnifiedHighlighter highlighter, + QueryShardContext qsc, MappedFieldType fieldType, - SearchHighlightContext.Field field, FetchSubPhase.HitContext hitContext, boolean forceSource ) throws IOException { - List fieldValues = HighlightUtils.loadFieldValues(fieldType, hitContext, forceSource); + List fieldValues = HighlightUtils.loadFieldValues(fieldType, qsc, hitContext, forceSource); fieldValues = fieldValues.stream() .map((s) -> convertFieldValue(fieldType, s)) .collect(Collectors.toList()); diff --git a/server/src/test/java/org/apache/lucene/search/uhighlight/CustomUnifiedHighlighterTests.java b/server/src/test/java/org/apache/lucene/search/uhighlight/CustomUnifiedHighlighterTests.java index 86d5de4db2efb..27b7acd5cb789 100644 --- a/server/src/test/java/org/apache/lucene/search/uhighlight/CustomUnifiedHighlighterTests.java +++ b/server/src/test/java/org/apache/lucene/search/uhighlight/CustomUnifiedHighlighterTests.java @@ -92,7 +92,6 @@ private void assertHighlightOneDoc(String fieldName, String[] inputs, Analyzer a noMatchSize, expectedPassages.length, name -> "text".equals(name), - Integer.MAX_VALUE, Integer.MAX_VALUE ); final Snippet[] snippets = highlighter.highlightField(getOnlyLeafReader(reader), topDocs.scoreDocs[0].doc, () -> rawValue);