Skip to content

Commit

Permalink
FastVectorHighlighter should not cache the field query globally
Browse files Browse the repository at this point in the history
This commit removes the global caching of the field query and replaces it with
a caching per field. Each field can use a different `highlight_query` and the rewriting of
some queries (prefix, automaton, ...) depends on the targeted field so the query used for highlighting
must be unique per field.
There might be a small performance penalty when highlighting multiple fields since the query needs to be rewritten
once per highlighted field with this change.

Fixes elastic#25171
  • Loading branch information
jimczi committed Jun 13, 2017
1 parent 186c16e commit 6c519cf
Show file tree
Hide file tree
Showing 2 changed files with 72 additions and 25 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -87,29 +87,6 @@ public HighlightField highlight(HighlighterContext highlighterContext) {
HighlighterEntry cache = (HighlighterEntry) hitContext.cache().get(CACHE_KEY);

try {
FieldQuery fieldQuery;
if (field.fieldOptions().requireFieldMatch()) {
if (cache.fieldMatchFieldQuery == null) {
/*
* we use top level reader to rewrite the query against all readers,
* with use caching it across hits (and across readers...)
*/
cache.fieldMatchFieldQuery = new CustomFieldQuery(highlighterContext.query,
hitContext.topLevelReader(), true, field.fieldOptions().requireFieldMatch());
}
fieldQuery = cache.fieldMatchFieldQuery;
} else {
if (cache.noFieldMatchFieldQuery == null) {
/*
* we use top level reader to rewrite the query against all readers,
* with use caching it across hits (and across readers...)
*/
cache.noFieldMatchFieldQuery = new CustomFieldQuery(highlighterContext.query,
hitContext.topLevelReader(), true, field.fieldOptions().requireFieldMatch());
}
fieldQuery = cache.noFieldMatchFieldQuery;
}

MapperHighlightEntry entry = cache.mappers.get(mapper);
if (entry == null) {
FragListBuilder fragListBuilder;
Expand Down Expand Up @@ -151,6 +128,21 @@ public HighlightField highlight(HighlighterContext highlighterContext) {
}
fragmentsBuilder.setDiscreteMultiValueHighlighting(termVectorMultiValue);
entry = new MapperHighlightEntry();
if (field.fieldOptions().requireFieldMatch()) {
/**
* we use top level reader to rewrite the query against all readers,
* with use caching it across hits (and across readers...)
*/
entry.fieldMatchFieldQuery = new CustomFieldQuery(highlighterContext.query,
hitContext.topLevelReader(), true, field.fieldOptions().requireFieldMatch());
} else {
/**
* we use top level reader to rewrite the query against all readers,
* with use caching it across hits (and across readers...)
*/
entry.noFieldMatchFieldQuery = new CustomFieldQuery(highlighterContext.query,
hitContext.topLevelReader(), true, field.fieldOptions().requireFieldMatch());
}
entry.fragListBuilder = fragListBuilder;
entry.fragmentsBuilder = fragmentsBuilder;
if (cache.fvh == null) {
Expand All @@ -162,6 +154,12 @@ public HighlightField highlight(HighlighterContext highlighterContext) {
CustomFieldQuery.highlightFilters.set(field.fieldOptions().highlightFilter());
cache.mappers.put(mapper, entry);
}
final FieldQuery fieldQuery;
if (field.fieldOptions().requireFieldMatch()) {
fieldQuery = entry.fieldMatchFieldQuery;
} else {
fieldQuery = entry.noFieldMatchFieldQuery;
}
cache.fvh.setPhraseLimit(field.fieldOptions().phraseLimit());

String[] fragments;
Expand Down Expand Up @@ -249,12 +247,12 @@ private static BoundaryScanner getBoundaryScanner(Field field) {
private class MapperHighlightEntry {
public FragListBuilder fragListBuilder;
public FragmentsBuilder fragmentsBuilder;
public FieldQuery noFieldMatchFieldQuery;
public FieldQuery fieldMatchFieldQuery;
}

private class HighlighterEntry {
public org.apache.lucene.search.vectorhighlight.FastVectorHighlighter fvh;
public FieldQuery noFieldMatchFieldQuery;
public FieldQuery fieldMatchFieldQuery;
public Map<FieldMapper, MapperHighlightEntry> mappers = new HashMap<>();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
setup:
- do:
indices.create:
index: test
body:
mappings:
doc:
"properties":
"title":
"type": "text"
"term_vector": "with_positions_offsets"
"description":
"type": "text"
"term_vector": "with_positions_offsets"
- do:
index:
index: test
type: doc
id: 1
body:
"title" : "The quick brown fox is brown"
"description" : "The quick pink panther is pink"
- do:
indices.refresh: {}

---
"Highlight query":
- skip:
version: " - 5.5.99"
reason: bug fixed in 5.6
- do:
search:
body:
highlight:
type: fvh
fields:
description:
type: fvh
highlight_query:
prefix:
description: br
title:
type: fvh
highlight_query:
prefix:
title: br

- match: {hits.hits.0.highlight.title.0: "The quick <em>brown</em> fox is <em>brown</em>"}
- is_false: hits.hits.0.highlight.description

0 comments on commit 6c519cf

Please sign in to comment.