Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use ValueFetcher when loading text snippets to highlight #63572

Merged
merged 10 commits into from
Nov 24, 2020
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedHighlighterAnalyzer;
import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedText;
import org.elasticsearch.index.query.QueryShardContext;
import org.elasticsearch.search.fetch.FetchSubPhase.HitContext;
import org.elasticsearch.search.fetch.subphase.highlight.SearchHighlightContext.Field;

import java.io.IOException;
import java.util.ArrayList;
Expand All @@ -41,12 +41,12 @@ public class AnnotatedTextHighlighter extends UnifiedHighlighter {
@Override
protected List<Object> loadFieldValues(
CustomUnifiedHighlighter highlighter,
QueryShardContext qsc,
MappedFieldType fieldType,
Field field,
HitContext hitContext,
boolean forceSource
) throws IOException {
List<Object> fieldValues = super.loadFieldValues(highlighter, fieldType, field, hitContext, forceSource);
List<Object> fieldValues = super.loadFieldValues(highlighter, qsc, fieldType, hitContext, forceSource);

List<Object> strings = new ArrayList<>(fieldValues.size());
AnnotatedText[] annotations = new AnnotatedText[fieldValues.size()];
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,6 @@ private void assertHighlightOneDoc(String fieldName, String []markedUpInputs,
noMatchSize,
expectedPassages.length,
name -> "text".equals(name),
Integer.MAX_VALUE,
Integer.MAX_VALUE
);
highlighter.setFieldMatcher((name) -> "text".equals(name));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2720,6 +2720,34 @@ public void testKeywordFieldHighlighting() throws IOException {
equalTo("<em>some text</em>"));
}

public void testCopyToFields() throws Exception {
XContentBuilder b = jsonBuilder().startObject().startObject("properties");
b.startObject("foo");
{
b.field("type", "text");
b.field("copy_to", "foo_copy");
}
b.endObject();
b.startObject("foo_copy").field("type", "text").endObject();
b.endObject().endObject();
prepareCreate("test").setMapping(b).get();

client().prepareIndex("test").setId("1")
.setSource(jsonBuilder().startObject().field("foo", "how now brown cow").endObject())
.setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE)
.get();

SearchResponse response = client().prepareSearch()
.setQuery(matchQuery("foo_copy", "brown"))
.highlighter(new HighlightBuilder().field(new Field("foo_copy")))
.get();

assertHitCount(response, 1);
HighlightField field = response.getHits().getAt(0).getHighlightFields().get("foo_copy");
assertThat(field.getFragments().length, equalTo(1));
assertThat(field.getFragments()[0].string(), equalTo("how now <em>brown</em> cow"));
}

public void testACopyFieldWithNestedQuery() throws Exception {
String mapping = Strings.toString(jsonBuilder().startObject().startObject("properties")
.startObject("foo")
Expand Down Expand Up @@ -2931,7 +2959,7 @@ public void testWithNormalizer() throws Exception {
assertHitCount(searchResponse, 1);
HighlightField field = searchResponse.getHits().getAt(0).getHighlightFields().get("keyword");
assertThat(field.getFragments().length, equalTo(1));
assertThat(field.getFragments()[0].string(), equalTo("<em>Hello World</em>"));
assertThat(field.getFragments()[0].string(), equalTo("<em>hello world</em>"));
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,6 @@ public class CustomUnifiedHighlighter extends UnifiedHighlighter {
private final Locale breakIteratorLocale;
private final int noMatchSize;
private final FieldHighlighter fieldHighlighter;
private final int keywordIgnoreAbove;
private final int maxAnalyzedOffset;

/**
Expand All @@ -84,7 +83,6 @@ public class CustomUnifiedHighlighter extends UnifiedHighlighter {
* @param noMatchSize The size of the text that should be returned when no highlighting can be performed.
* @param maxPassages the maximum number of passes to highlight
* @param fieldMatcher decides which terms should be highlighted
* @param keywordIgnoreAbove if the field's value is longer than this we'll skip it
* @param maxAnalyzedOffset if the field is more than this long we'll refuse to use the ANALYZED
* offset source for it because it'd be super slow
*/
Expand All @@ -98,7 +96,6 @@ public CustomUnifiedHighlighter(IndexSearcher searcher,
int noMatchSize,
int maxPassages,
Predicate<String> fieldMatcher,
int keywordIgnoreAbove,
int maxAnalyzedOffset) throws IOException {
super(searcher, analyzer);
this.offsetSource = offsetSource;
Expand All @@ -109,7 +106,6 @@ public CustomUnifiedHighlighter(IndexSearcher searcher,
this.field = field;
this.noMatchSize = noMatchSize;
this.setFieldMatcher(fieldMatcher);
this.keywordIgnoreAbove = keywordIgnoreAbove;
this.maxAnalyzedOffset = maxAnalyzedOffset;
fieldHighlighter = getFieldHighlighter(field, query, extractTerms(query), maxPassages);
}
Expand All @@ -127,9 +123,6 @@ public Snippet[] highlightField(LeafReader reader, int docId, CheckedSupplier<St
return null;
}
int fieldValueLength = fieldValue.length();
if (fieldValueLength > keywordIgnoreAbove) {
return null; // skip highlighting keyword terms that were ignored during indexing
}
if ((offsetSource == OffsetSource.ANALYSIS) && (fieldValueLength > maxAnalyzedOffset)) {
throw new IllegalArgumentException(
"The length of ["
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ public List<Object> fetchValues(SourceLookup lookup) {
for (String path : sourcePaths) {
Object sourceValue = lookup.extractValue(path, nullValue);
if (sourceValue == null) {
return List.of();
continue;
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This was a bug - if the first of multiple source paths didn't have any values, we returned early instead of continuing to check further paths.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you for catching this! Maybe we could fix this in a separate PR (with a quick test) to keep this one scoped to highlighting?

}

// We allow source values to contain multiple levels of arrays, such as `"field": [[1, 2]]`.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,14 @@
import org.apache.lucene.search.highlight.SimpleHTMLEncoder;
import org.elasticsearch.index.fieldvisitor.CustomFieldsVisitor;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.ValueFetcher;
import org.elasticsearch.index.query.QueryShardContext;
import org.elasticsearch.search.fetch.FetchSubPhase;
import org.elasticsearch.search.lookup.SourceLookup;

import java.io.IOException;
import java.util.Collections;
import java.util.List;
import java.util.Objects;

import static java.util.Collections.singleton;

Expand All @@ -46,24 +48,17 @@ private HighlightUtils() {
* Load field values for highlighting.
*/
public static List<Object> loadFieldValues(MappedFieldType fieldType,
QueryShardContext qsc,
FetchSubPhase.HitContext hitContext,
boolean forceSource) throws IOException {
//percolator needs to always load from source, thus it sets the global force source to true
List<Object> textsToHighlight;
if (forceSource == false && fieldType.isStored()) {
romseygeek marked this conversation as resolved.
Show resolved Hide resolved
CustomFieldsVisitor fieldVisitor = new CustomFieldsVisitor(singleton(fieldType.name()), false);
hitContext.reader().document(hitContext.docId(), fieldVisitor);
textsToHighlight = fieldVisitor.fields().get(fieldType.name());
if (textsToHighlight == null) {
// Can happen if the document doesn't have the field to highlight
textsToHighlight = Collections.emptyList();
}
} else {
SourceLookup sourceLookup = hitContext.sourceLookup();
textsToHighlight = sourceLookup.extractRawValues(fieldType.name());
List<Object> textsToHighlight = fieldVisitor.fields().get(fieldType.name());
return Objects.requireNonNullElse(textsToHighlight, Collections.emptyList());
}
assert textsToHighlight != null;
return textsToHighlight;
ValueFetcher fetcher = fieldType.valueFetcher(qsc, null);
return fetcher.fetchValues(hitContext.sourceLookup());
}

public static class Encoders {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@
import org.apache.lucene.util.CollectionUtil;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.mapper.KeywordFieldMapper;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.search.fetch.FetchContext;
import org.elasticsearch.search.fetch.FetchSubPhase;
Expand Down Expand Up @@ -101,20 +100,14 @@ public HighlightField highlight(FieldHighlightContext fieldContext) throws IOExc
ArrayList<TextFragment> fragsList = new ArrayList<>();
List<Object> textsToHighlight;
Analyzer analyzer = context.getQueryShardContext().getFieldNameIndexAnalyzer();
Integer keywordIgnoreAbove = null;
if (fieldType instanceof KeywordFieldMapper.KeywordFieldType) {
keywordIgnoreAbove = ((KeywordFieldMapper.KeywordFieldType)fieldType).ignoreAbove();
}
final int maxAnalyzedOffset = context.getQueryShardContext().getIndexSettings().getHighlightMaxAnalyzedOffset();

textsToHighlight = HighlightUtils.loadFieldValues(fieldType, hitContext, fieldContext.forceSource);
textsToHighlight
= HighlightUtils.loadFieldValues(fieldType, context.getQueryShardContext(), hitContext, fieldContext.forceSource);

for (Object textToHighlight : textsToHighlight) {
String text = convertFieldValue(fieldType, textToHighlight);
int textLength = text.length();
if (keywordIgnoreAbove != null && textLength > keywordIgnoreAbove) {
continue; // skip highlighting keyword terms that were ignored during indexing
}
if (textLength > maxAnalyzedOffset) {
throw new IllegalArgumentException(
"The length of [" + fieldContext.fieldName + "] field of [" + hitContext.hit().getId() +
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,9 @@
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.index.mapper.IdFieldMapper;
import org.elasticsearch.index.mapper.KeywordFieldMapper;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.TextSearchInfo;
import org.elasticsearch.index.query.QueryShardContext;
import org.elasticsearch.search.fetch.FetchSubPhase;
import org.elasticsearch.search.fetch.FetchSubPhase.HitContext;

Expand Down Expand Up @@ -72,7 +72,8 @@ public HighlightField highlight(FieldHighlightContext fieldContext) throws IOExc
FetchSubPhase.HitContext hitContext = fieldContext.hitContext;

CheckedSupplier<String, IOException> loadFieldValues = () -> {
List<Object> fieldValues = loadFieldValues(highlighter, fieldType, field, hitContext, fieldContext.forceSource);
List<Object> fieldValues = loadFieldValues(highlighter, fieldContext.context.getQueryShardContext(), fieldType,
hitContext, fieldContext.forceSource);
if (fieldValues.size() == 0) {
return null;
}
Expand Down Expand Up @@ -111,10 +112,6 @@ CustomUnifiedHighlighter buildHighlighter(FieldHighlightContext fieldContext) th
? HighlightUtils.Encoders.HTML
: HighlightUtils.Encoders.DEFAULT;
int maxAnalyzedOffset = fieldContext.context.getQueryShardContext().getIndexSettings().getHighlightMaxAnalyzedOffset();
int keywordIgnoreAbove = Integer.MAX_VALUE;
if (fieldContext.fieldType instanceof KeywordFieldMapper.KeywordFieldType) {
keywordIgnoreAbove = ((KeywordFieldMapper.KeywordFieldType)fieldContext.fieldType).ignoreAbove();
}
int numberOfFragments = fieldContext.field.fieldOptions().numberOfFragments();
Analyzer analyzer = wrapAnalyzer(fieldContext.context.getQueryShardContext().getFieldNameIndexAnalyzer());
PassageFormatter passageFormatter = getPassageFormatter(fieldContext.hitContext, fieldContext.field, encoder);
Expand Down Expand Up @@ -151,7 +148,6 @@ CustomUnifiedHighlighter buildHighlighter(FieldHighlightContext fieldContext) th
fieldContext.field.fieldOptions().noMatchSize(),
higlighterNumberOfFragments,
fieldMatcher(fieldContext),
keywordIgnoreAbove,
maxAnalyzedOffset
);
}
Expand All @@ -167,12 +163,12 @@ protected Analyzer wrapAnalyzer(Analyzer analyzer) {

protected List<Object> loadFieldValues(
CustomUnifiedHighlighter highlighter,
QueryShardContext qsc,
MappedFieldType fieldType,
SearchHighlightContext.Field field,
FetchSubPhase.HitContext hitContext,
boolean forceSource
) throws IOException {
List<Object> fieldValues = HighlightUtils.loadFieldValues(fieldType, hitContext, forceSource);
List<Object> fieldValues = HighlightUtils.loadFieldValues(fieldType, qsc, hitContext, forceSource);
fieldValues = fieldValues.stream()
.map((s) -> convertFieldValue(fieldType, s))
.collect(Collectors.toList());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,6 @@ private void assertHighlightOneDoc(String fieldName, String[] inputs, Analyzer a
noMatchSize,
expectedPassages.length,
name -> "text".equals(name),
Integer.MAX_VALUE,
Integer.MAX_VALUE
);
final Snippet[] snippets = highlighter.highlightField(getOnlyLeafReader(reader), topDocs.scoreDocs[0].doc, () -> rawValue);
Expand Down