Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix bug where fvh fragments could be loaded from wrong doc #65641

Merged
merged 6 commits into from
Dec 9, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,23 +1,42 @@
setup:
- do:
indices.create:
index: test
body:
mappings:
"properties":
"title":
"type": "text"
"term_vector": "with_positions_offsets"
"description":
"type": "text"
"term_vector": "with_positions_offsets"
index: test
body:
mappings:
"properties":
"id":
"type": "integer"
"title":
"type": "text"
"term_vector": "with_positions_offsets"
"description":
"type": "text"
"term_vector": "with_positions_offsets"
"nested":
"type": "nested"
"properties":
"title":
"type": "text"
"term_vector": "with_positions_offsets"
- do:
index:
index: test
body:
id: 1
"title" : "The quick brown fox is brown"
"description" : "The quick pink panther is pink"

- do:
index:
index: test
id: 1
body:
"title" : "The quick brown fox is brown"
"description" : "The quick pink panther is pink"
id: 2
"title" : "The quick blue fox is blue"
"nested":
- "title": "purple octopus"
- "title": "purple fish"

- do:
indices.refresh: {}

Expand All @@ -27,19 +46,69 @@ setup:
search:
rest_total_hits_as_int: true
body:
highlight:
type: fvh
fields:
description:
type: fvh
highlight_query:
prefix:
description: br
title:
type: fvh
highlight_query:
prefix:
title: br
highlight:
type: fvh
fields:
description:
type: fvh
highlight_query:
prefix:
description: br
title:
type: fvh
highlight_query:
prefix:
title: br

- match: {hits.hits.0.highlight.title.0: "The quick <em>brown</em> fox is <em>brown</em>"}
- is_false: hits.hits.0.highlight.description

---
"Highlight multiple documents":
- skip:
version: " - 7.99.99"
reason: Bug fix not yet backported
- do:
search:
rest_total_hits_as_int: true
body:
query:
match:
title: fox
sort: ["id"]
highlight:
type: fvh
fields:
title:
type: fvh

- match: {hits.hits.0.highlight.title.0: "The quick brown <em>fox</em> is brown"}
- is_false: hits.hits.0.highlight.description
- match: {hits.hits.1.highlight.title.0: "The quick blue <em>fox</em> is blue"}
- is_false: hits.hits.1.highlight.description

---
"Highlight multiple nested documents":
- skip:
version: " - 7.99.99"
reason: Bug fix not yet backported
- do:
search:
rest_total_hits_as_int: true
body:
query:
nested:
path: nested
query:
match:
nested.title: purple
inner_hits:
name: nested_hits
highlight:
type: fvh
fields:
nested.title:
type: fvh

- match: {hits.hits.0.inner_hits.nested_hits.hits.hits.0.highlight.nested\\.title.0: "<em>purple</em> octopus"}
- match: {hits.hits.0.inner_hits.nested_hits.hits.hits.1.highlight.nested\\.title.0: "<em>purple</em> fish"}
Original file line number Diff line number Diff line change
Expand Up @@ -41,13 +41,15 @@
import org.elasticsearch.search.fetch.FetchSubPhase;
import org.elasticsearch.search.fetch.subphase.highlight.SearchHighlightContext.Field;
import org.elasticsearch.search.fetch.subphase.highlight.SearchHighlightContext.FieldOptions;
import org.elasticsearch.search.lookup.SourceLookup;

import java.io.IOException;
import java.text.BreakIterator;
import java.util.Collections;
import java.util.HashMap;
import java.util.Locale;
import java.util.Map;
import java.util.function.Function;

public class FastVectorHighlighter implements Highlighter {
private static final BoundaryScanner DEFAULT_SIMPLE_BOUNDARY_SCANNER = new SimpleBoundaryScanner();
Expand Down Expand Up @@ -89,42 +91,16 @@ public HighlightField highlight(FieldHighlightContext fieldContext) throws IOExc
FieldHighlightEntry entry = cache.fields.get(fieldType);
if (entry == null) {
FragListBuilder fragListBuilder;
BaseFragmentsBuilder fragmentsBuilder;

final BoundaryScanner boundaryScanner = getBoundaryScanner(field);
if (field.fieldOptions().numberOfFragments() == 0) {
fragListBuilder = new SingleFragListBuilder();

if (!forceSource && fieldType.isStored()) {
fragmentsBuilder = new SimpleFragmentsBuilder(fieldType, fixBrokenAnalysis, field.fieldOptions().preTags(),
field.fieldOptions().postTags(), boundaryScanner);
} else {
fragmentsBuilder = new SourceSimpleFragmentsBuilder(fieldType, fixBrokenAnalysis, hitContext.sourceLookup(),
field.fieldOptions().preTags(), field.fieldOptions().postTags(), boundaryScanner);
}
} else {
fragListBuilder = field.fieldOptions().fragmentOffset() == -1 ?
new SimpleFragListBuilder() : new SimpleFragListBuilder(field.fieldOptions().fragmentOffset());
if (field.fieldOptions().scoreOrdered()) {
if (!forceSource && fieldType.isStored()) {
fragmentsBuilder = new ScoreOrderFragmentsBuilder(field.fieldOptions().preTags(),
field.fieldOptions().postTags(), boundaryScanner);
} else {
fragmentsBuilder = new SourceScoreOrderFragmentsBuilder(fieldType, fixBrokenAnalysis, hitContext.sourceLookup(),
field.fieldOptions().preTags(), field.fieldOptions().postTags(), boundaryScanner);
}
} else {
if (!forceSource && fieldType.isStored()) {
fragmentsBuilder = new SimpleFragmentsBuilder(fieldType, fixBrokenAnalysis, field.fieldOptions().preTags(),
field.fieldOptions().postTags(), boundaryScanner);
} else {
fragmentsBuilder =
new SourceSimpleFragmentsBuilder(fieldType, fixBrokenAnalysis, hitContext.sourceLookup(),
field.fieldOptions().preTags(), field.fieldOptions().postTags(), boundaryScanner);
}
}
}
fragmentsBuilder.setDiscreteMultiValueHighlighting(termVectorMultiValue);

Function<SourceLookup, FragmentsBuilder> fragmentsBuilderSupplier = fragmentsBuilderSupplier(
field, fieldType, forceSource, fixBrokenAnalysis);

entry = new FieldHighlightEntry();
if (field.fieldOptions().requireFieldMatch()) {
/*
Expand All @@ -142,7 +118,7 @@ public HighlightField highlight(FieldHighlightContext fieldContext) throws IOExc
hitContext.topLevelReader(), true, field.fieldOptions().requireFieldMatch());
}
entry.fragListBuilder = fragListBuilder;
entry.fragmentsBuilder = fragmentsBuilder;
entry.fragmentsBuilderSupplier = fragmentsBuilderSupplier;
if (cache.fvh == null) {
// parameters to FVH are not requires since:
// first two booleans are not relevant since they are set on the CustomFieldQuery
Expand All @@ -161,6 +137,7 @@ public HighlightField highlight(FieldHighlightContext fieldContext) throws IOExc
cache.fvh.setPhraseLimit(field.fieldOptions().phraseLimit());

String[] fragments;
FragmentsBuilder fragmentsBuilder = entry.fragmentsBuilderSupplier.apply(hitContext.sourceLookup());

// a HACK to make highlighter do highlighting, even though its using the single frag list builder
int numberOfFragments = field.fieldOptions().numberOfFragments() == 0 ?
Expand All @@ -172,12 +149,12 @@ public HighlightField highlight(FieldHighlightContext fieldContext) throws IOExc
if (field.fieldOptions().matchedFields() != null && !field.fieldOptions().matchedFields().isEmpty()) {
fragments = cache.fvh.getBestFragments(fieldQuery, hitContext.reader(), hitContext.docId(),
fieldType.name(), field.fieldOptions().matchedFields(), fragmentCharSize,
numberOfFragments, entry.fragListBuilder, entry.fragmentsBuilder, field.fieldOptions().preTags(),
numberOfFragments, entry.fragListBuilder, fragmentsBuilder, field.fieldOptions().preTags(),
field.fieldOptions().postTags(), encoder);
} else {
fragments = cache.fvh.getBestFragments(fieldQuery, hitContext.reader(), hitContext.docId(),
fieldType.name(), fragmentCharSize, numberOfFragments, entry.fragListBuilder,
entry.fragmentsBuilder, field.fieldOptions().preTags(), field.fieldOptions().postTags(), encoder);
fragmentsBuilder, field.fieldOptions().preTags(), field.fieldOptions().postTags(), encoder);
}

if (CollectionUtils.isEmpty(fragments) == false) {
Expand All @@ -190,7 +167,7 @@ public HighlightField highlight(FieldHighlightContext fieldContext) throws IOExc
// the normal fragmentsBuilder
FieldFragList fieldFragList = new SimpleFieldFragList(-1 /*ignored*/);
fieldFragList.add(0, noMatchSize, Collections.emptyList());
fragments = entry.fragmentsBuilder.createFragments(hitContext.reader(), hitContext.docId(),
fragments = fragmentsBuilder.createFragments(hitContext.reader(), hitContext.docId(),
fieldType.name(), fieldFragList, 1, field.fieldOptions().preTags(),
field.fieldOptions().postTags(), encoder);
if (CollectionUtils.isEmpty(fragments) == false) {
Expand All @@ -201,6 +178,37 @@ public HighlightField highlight(FieldHighlightContext fieldContext) throws IOExc
return null;
}

private Function<SourceLookup, FragmentsBuilder> fragmentsBuilderSupplier(SearchHighlightContext.Field field,
MappedFieldType fieldType,
boolean forceSource,
boolean fixBrokenAnalysis) {
BoundaryScanner boundaryScanner = getBoundaryScanner(field);
FieldOptions options = field.fieldOptions();
Function<SourceLookup, BaseFragmentsBuilder> supplier;
if (!forceSource && fieldType.isStored()) {
if (options.numberOfFragments() != 0 && options.scoreOrdered()) {
supplier = ignored -> new ScoreOrderFragmentsBuilder(options.preTags(), options.postTags(), boundaryScanner);
} else {
supplier = ignored -> new SimpleFragmentsBuilder(fieldType, fixBrokenAnalysis,
options.preTags(), options.postTags(), boundaryScanner);
}
} else {
if (options.numberOfFragments() != 0 && options.scoreOrdered()) {
supplier = lookup -> new SourceScoreOrderFragmentsBuilder(fieldType, fixBrokenAnalysis, lookup,
options.preTags(), options.postTags(), boundaryScanner);
} else {
supplier = lookup -> new SourceSimpleFragmentsBuilder(fieldType, fixBrokenAnalysis, lookup,
options.preTags(), options.postTags(), boundaryScanner);
}
}

return lookup -> {
BaseFragmentsBuilder builder = supplier.apply(lookup);
builder.setDiscreteMultiValueHighlighting(termVectorMultiValue);
return builder;
};
}

@Override
public boolean canHighlight(MappedFieldType ft) {
return ft.getTextSearchInfo().termVectors() == TextSearchInfo.TermVector.OFFSETS;
Expand Down Expand Up @@ -238,7 +246,7 @@ private static BoundaryScanner getBoundaryScanner(Field field) {

private static class FieldHighlightEntry {
public FragListBuilder fragListBuilder;
public FragmentsBuilder fragmentsBuilder;
public Function<SourceLookup, FragmentsBuilder> fragmentsBuilderSupplier;
public FieldQuery noFieldMatchFieldQuery;
public FieldQuery fieldMatchFieldQuery;
}
Expand Down