Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bug fix for AnnotatedTextHighlighter #39525

Merged
merged 6 commits into from
Mar 6, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@
import org.elasticsearch.index.mapper.TextFieldMapper;
import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedText.AnnotationToken;
import org.elasticsearch.index.query.QueryShardContext;
import org.elasticsearch.search.fetch.FetchSubPhase.HitContext;

import java.io.IOException;
import java.io.Reader;
Expand Down Expand Up @@ -317,46 +318,13 @@ public AnnotationToken getAnnotation(int index) {
// When asked to tokenize plain-text versions by the highlighter it tokenizes the
// original markup form in order to inject annotations.
public static final class AnnotatedHighlighterAnalyzer extends AnalyzerWrapper {
private Analyzer delegate;
private AnnotatedText[] annotations;
public AnnotatedHighlighterAnalyzer(Analyzer delegate){
private final Analyzer delegate;
private final HitContext hitContext;
public AnnotatedHighlighterAnalyzer(Analyzer delegate, HitContext hitContext){
super(delegate.getReuseStrategy());
this.delegate = delegate;
this.hitContext = hitContext;
}

public void init(String[] markedUpFieldValues) {
this.annotations = new AnnotatedText[markedUpFieldValues.length];
for (int i = 0; i < markedUpFieldValues.length; i++) {
annotations[i] = AnnotatedText.parse(markedUpFieldValues[i]);
}
}

public String [] getPlainTextValuesForHighlighter(){
String [] result = new String[annotations.length];
for (int i = 0; i < annotations.length; i++) {
result[i] = annotations[i].textMinusMarkup;
}
return result;
}

public AnnotationToken[] getIntersectingAnnotations(int start, int end) {
List<AnnotationToken> intersectingAnnotations = new ArrayList<>();
int fieldValueOffset =0;
for (AnnotatedText fieldValueAnnotations : this.annotations) {
//This is called from a highlighter where all of the field values are concatenated
// so each annotation offset will need to be adjusted so that it takes into account
// the previous values AND the MULTIVAL delimiter
for (AnnotationToken token : fieldValueAnnotations.annotations) {
if(token.intersects(start - fieldValueOffset , end - fieldValueOffset)) {
intersectingAnnotations.add(new AnnotationToken(token.offset + fieldValueOffset,
token.endOffset + fieldValueOffset, token.value));
}
}
//add 1 for the fieldvalue separator character
fieldValueOffset +=fieldValueAnnotations.textMinusMarkup.length() +1;
}
return intersectingAnnotations.toArray(new AnnotationToken[intersectingAnnotations.size()]);
}

@Override
public Analyzer getWrappedAnalyzer(String fieldName) {
Expand All @@ -370,7 +338,8 @@ protected TokenStreamComponents wrapComponents(String fieldName, TokenStreamComp
return components;
}
AnnotationsInjector injector = new AnnotationsInjector(components.getTokenStream());
return new AnnotatedHighlighterTokenStreamComponents(components.getTokenizer(), injector, this.annotations);
AnnotatedText[] annotations = (AnnotatedText[]) hitContext.cache().get(AnnotatedText.class.getName());
return new AnnotatedHighlighterTokenStreamComponents(components.getTokenizer(), injector, annotations);
}
}
private static final class AnnotatedHighlighterTokenStreamComponents extends TokenStreamComponents{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
import org.apache.lucene.search.uhighlight.Passage;
import org.apache.lucene.search.uhighlight.PassageFormatter;
import org.apache.lucene.search.uhighlight.Snippet;
import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedHighlighterAnalyzer;
import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedText;
import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedText.AnnotationToken;

import java.io.UnsupportedEncodingException;
Expand All @@ -42,11 +42,11 @@ public class AnnotatedPassageFormatter extends PassageFormatter {

public static final String SEARCH_HIT_TYPE = "_hit_term";
private final Encoder encoder;
private AnnotatedHighlighterAnalyzer annotatedHighlighterAnalyzer;
AnnotatedText[] annotations;

public AnnotatedPassageFormatter(AnnotatedHighlighterAnalyzer annotatedHighlighterAnalyzer, Encoder encoder) {
this.annotatedHighlighterAnalyzer = annotatedHighlighterAnalyzer;
public AnnotatedPassageFormatter(AnnotatedText[] annotations, Encoder encoder) {
this.encoder = encoder;
this.annotations = annotations;
}

static class MarkupPassage {
Expand Down Expand Up @@ -158,7 +158,7 @@ public Snippet[] format(Passage[] passages, String content) {
int pos;
int j = 0;
for (Passage passage : passages) {
AnnotationToken [] annotations = annotatedHighlighterAnalyzer.getIntersectingAnnotations(passage.getStartOffset(),
AnnotationToken [] annotations = getIntersectingAnnotations(passage.getStartOffset(),
passage.getEndOffset());
MarkupPassage mergedMarkup = mergeAnnotations(annotations, passage);

Expand Down Expand Up @@ -194,6 +194,27 @@ public Snippet[] format(Passage[] passages, String content) {
}
return snippets;
}

public AnnotationToken[] getIntersectingAnnotations(int start, int end) {
List<AnnotationToken> intersectingAnnotations = new ArrayList<>();
int fieldValueOffset =0;
for (AnnotatedText fieldValueAnnotations : this.annotations) {
//This is called from a highlighter where all of the field values are concatenated
// so each annotation offset will need to be adjusted so that it takes into account
// the previous values AND the MULTIVAL delimiter
for (int i = 0; i < fieldValueAnnotations.numAnnotations(); i++) {
AnnotationToken token = fieldValueAnnotations.getAnnotation(i);
if (token.intersects(start - fieldValueOffset, end - fieldValueOffset)) {
intersectingAnnotations
.add(new AnnotationToken(token.offset + fieldValueOffset, token.endOffset +
fieldValueOffset, token.value));
}
}
//add 1 for the fieldvalue separator character
fieldValueOffset +=fieldValueAnnotations.textMinusMarkup.length() +1;
}
return intersectingAnnotations.toArray(new AnnotationToken[intersectingAnnotations.size()]);
}

private void append(StringBuilder dest, String content, int start, int end) {
dest.append(encoder.encodeText(content.substring(start, end)));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,24 +25,22 @@
import org.elasticsearch.index.mapper.DocumentMapper;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedHighlighterAnalyzer;
import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedText;
import org.elasticsearch.search.fetch.FetchSubPhase.HitContext;
import org.elasticsearch.search.fetch.subphase.highlight.SearchContextHighlight.Field;
import org.elasticsearch.search.internal.SearchContext;

import java.io.IOException;
import java.util.Arrays;
import java.util.ArrayList;
import java.util.List;

public class AnnotatedTextHighlighter extends UnifiedHighlighter {

public static final String NAME = "annotated";

AnnotatedHighlighterAnalyzer annotatedHighlighterAnalyzer = null;

@Override
protected Analyzer getAnalyzer(DocumentMapper docMapper, MappedFieldType type) {
annotatedHighlighterAnalyzer = new AnnotatedHighlighterAnalyzer(super.getAnalyzer(docMapper, type));
return annotatedHighlighterAnalyzer;
protected Analyzer getAnalyzer(DocumentMapper docMapper, MappedFieldType type, HitContext hitContext) {
return new AnnotatedHighlighterAnalyzer(super.getAnalyzer(docMapper, type, hitContext), hitContext);
}

// Convert the marked-up values held on-disk to plain-text versions for highlighting
Expand All @@ -51,14 +49,26 @@ protected List<Object> loadFieldValues(MappedFieldType fieldType, Field field, S
throws IOException {
List<Object> fieldValues = super.loadFieldValues(fieldType, field, context, hitContext);
String[] fieldValuesAsString = fieldValues.toArray(new String[fieldValues.size()]);
annotatedHighlighterAnalyzer.init(fieldValuesAsString);
return Arrays.asList((Object[]) annotatedHighlighterAnalyzer.getPlainTextValuesForHighlighter());

AnnotatedText[] annotations = new AnnotatedText[fieldValuesAsString.length];
for (int i = 0; i < fieldValuesAsString.length; i++) {
annotations[i] = AnnotatedText.parse(fieldValuesAsString[i]);
}
// Store the annotations in the hitContext
hitContext.cache().put(AnnotatedText.class.getName(), annotations);

ArrayList<Object> result = new ArrayList<>(annotations.length);
for (int i = 0; i < annotations.length; i++) {
result.add(annotations[i].textMinusMarkup);
}
return result;
}

@Override
protected PassageFormatter getPassageFormatter(SearchContextHighlight.Field field, Encoder encoder) {
return new AnnotatedPassageFormatter(annotatedHighlighterAnalyzer, encoder);

protected PassageFormatter getPassageFormatter(HitContext hitContext,SearchContextHighlight.Field field, Encoder encoder) {
// Retrieve the annotations from the hitContext
AnnotatedText[] annotations = (AnnotatedText[]) hitContext.cache().get(AnnotatedText.class.getName());
return new AnnotatedPassageFormatter(annotations, encoder);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -40,36 +40,50 @@
import org.apache.lucene.search.highlight.DefaultEncoder;
import org.apache.lucene.search.uhighlight.CustomSeparatorBreakIterator;
import org.apache.lucene.search.uhighlight.CustomUnifiedHighlighter;
import org.apache.lucene.search.uhighlight.PassageFormatter;
import org.apache.lucene.search.uhighlight.Snippet;
import org.apache.lucene.search.uhighlight.SplittingBreakIterator;
import org.apache.lucene.store.Directory;
import org.elasticsearch.common.Strings;
import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedHighlighterAnalyzer;
import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedText;
import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotationAnalyzerWrapper;
import org.elasticsearch.search.fetch.FetchSubPhase.HitContext;
import org.elasticsearch.search.fetch.subphase.highlight.AnnotatedPassageFormatter;
import org.elasticsearch.test.ESTestCase;

import java.net.URLEncoder;
import java.text.BreakIterator;
import java.util.ArrayList;
import java.util.Locale;

import static org.apache.lucene.search.uhighlight.CustomUnifiedHighlighter.MULTIVAL_SEP_CHAR;
import static org.hamcrest.CoreMatchers.equalTo;

public class AnnotatedTextHighlighterTests extends ESTestCase {


private void assertHighlightOneDoc(String fieldName, String []markedUpInputs,
Query query, Locale locale, BreakIterator breakIterator,
int noMatchSize, String[] expectedPassages) throws Exception {


// Annotated fields wrap the usual analyzer with one that injects extra tokens
Analyzer wrapperAnalyzer = new AnnotationAnalyzerWrapper(new StandardAnalyzer());
AnnotatedHighlighterAnalyzer hiliteAnalyzer = new AnnotatedHighlighterAnalyzer(wrapperAnalyzer);
hiliteAnalyzer.init(markedUpInputs);
PassageFormatter passageFormatter = new AnnotatedPassageFormatter(hiliteAnalyzer,new DefaultEncoder());
String []plainTextForHighlighter = hiliteAnalyzer.getPlainTextValuesForHighlighter();
HitContext mockHitContext = new HitContext();
AnnotatedHighlighterAnalyzer hiliteAnalyzer = new AnnotatedHighlighterAnalyzer(wrapperAnalyzer, mockHitContext);

AnnotatedText[] annotations = new AnnotatedText[markedUpInputs.length];
for (int i = 0; i < markedUpInputs.length; i++) {
annotations[i] = AnnotatedText.parse(markedUpInputs[i]);
}
mockHitContext.cache().put(AnnotatedText.class.getName(), annotations);

AnnotatedPassageFormatter passageFormatter = new AnnotatedPassageFormatter(annotations,new DefaultEncoder());

ArrayList<Object> plainTextForHighlighter = new ArrayList<>(annotations.length);
for (int i = 0; i < annotations.length; i++) {
plainTextForHighlighter.add(annotations[i].textMinusMarkup);
}

Directory dir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig(wrapperAnalyzer);
Expand All @@ -94,7 +108,7 @@ private void assertHighlightOneDoc(String fieldName, String []markedUpInputs,
iw.close();
TopDocs topDocs = searcher.search(new MatchAllDocsQuery(), 1, Sort.INDEXORDER);
assertThat(topDocs.totalHits, equalTo(1L));
String rawValue = Strings.arrayToDelimitedString(plainTextForHighlighter, String.valueOf(MULTIVAL_SEP_CHAR));
String rawValue = Strings.collectionToDelimitedString(plainTextForHighlighter, String.valueOf(MULTIVAL_SEP_CHAR));

CustomUnifiedHighlighter highlighter = new CustomUnifiedHighlighter(searcher, hiliteAnalyzer, null,
passageFormatter, locale,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,3 +42,80 @@
body: { "query" : {"term" : { "text" : "quick" } }, "highlight" : { "type" : "annotated", "require_field_match": false, "fields" : { "text" : {} } } }

- match: {hits.hits.0.highlight.text.0: "The [quick](_hit_term=quick) brown fox is brown."}

---
"issue 39395 thread safety issue -requires multiple calls to reveal":
- skip:
version: " - 6.4.99"
reason: Annotated text type introduced in 6.5.0

- do:
indices.create:
index: annotated
body:
settings:
number_of_shards: "5"
number_of_replicas: "0"
mappings:
doc:
properties:
my_field:
type: annotated_text

- do:
index:
index: annotated
type: doc
id: 1
body:
"my_field" : "[A](~MARK0&~MARK0) [B](~MARK1)"
- do:
index:
index: annotated
type: doc
id: 2
body:
"my_field" : "[A](~MARK0) [C](~MARK2)"
refresh: true
- do:
search:
request_cache: false
body: { "query" : {"match_phrase" : { "my_field" : {"query": "~MARK0", "analyzer": "whitespace"} } }, "highlight" : { "type" : "annotated", "fields" : { "my_field" : {} } } }
- match: {_shards.failed: 0}

- do:
search:
request_cache: false
body: { "query" : {"match_phrase" : { "my_field" : {"query": "~MARK0", "analyzer": "whitespace"} } }, "highlight" : { "type" : "annotated", "fields" : { "my_field" : {} } } }
- match: {_shards.failed: 0}

- do:
search:
request_cache: false
body: { "query" : {"match_phrase" : { "my_field" : {"query": "~MARK0", "analyzer": "whitespace"} } }, "highlight" : { "type" : "annotated", "fields" : { "my_field" : {} } } }
- match: {_shards.failed: 0}

- do:
search:
request_cache: false
body: { "query" : {"match_phrase" : { "my_field" : {"query": "~MARK0", "analyzer": "whitespace"} } }, "highlight" : { "type" : "annotated", "fields" : { "my_field" : {} } } }
- match: {_shards.failed: 0}

- do:
search:
request_cache: false
body: { "query" : {"match_phrase" : { "my_field" : {"query": "~MARK0", "analyzer": "whitespace"} } }, "highlight" : { "type" : "annotated", "fields" : { "my_field" : {} } } }
- match: {_shards.failed: 0}

- do:
search:
request_cache: false
body: { "query" : {"match_phrase" : { "my_field" : {"query": "~MARK0", "analyzer": "whitespace"} } }, "highlight" : { "type" : "annotated", "fields" : { "my_field" : {} } } }
- match: {_shards.failed: 0}

- do:
search:
request_cache: false
body: { "query" : {"match_phrase" : { "my_field" : {"query": "~MARK0", "analyzer": "whitespace"} } }, "highlight" : { "type" : "annotated", "fields" : { "my_field" : {} } } }
- match: {_shards.failed: 0}

Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,6 @@ public Map<String, Object> cache() {
}
return cache;
}

}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.search.fetch.FetchPhaseExecutionException;
import org.elasticsearch.search.fetch.FetchSubPhase;
import org.elasticsearch.search.fetch.FetchSubPhase.HitContext;
import org.elasticsearch.search.internal.SearchContext;
import org.elasticsearch.index.IndexSettings;

Expand Down Expand Up @@ -70,12 +71,13 @@ public HighlightField highlight(HighlighterContext highlighterContext) {
int numberOfFragments;
try {

final Analyzer analyzer = getAnalyzer(context.mapperService().documentMapper(hitContext.hit().getType()), fieldType);
final Analyzer analyzer = getAnalyzer(context.mapperService().documentMapper(hitContext.hit().getType()), fieldType,
hitContext);
List<Object> fieldValues = loadFieldValues(fieldType, field, context, hitContext);
if (fieldValues.size() == 0) {
return null;
}
final PassageFormatter passageFormatter = getPassageFormatter(field, encoder);
final PassageFormatter passageFormatter = getPassageFormatter(hitContext, field, encoder);
final IndexSearcher searcher = new IndexSearcher(hitContext.reader());
final CustomUnifiedHighlighter highlighter;
final String fieldValue = mergeFieldValues(fieldValues, MULTIVAL_SEP_CHAR);
Expand Down Expand Up @@ -155,14 +157,14 @@ public HighlightField highlight(HighlighterContext highlighterContext) {
return null;
}

protected PassageFormatter getPassageFormatter(SearchContextHighlight.Field field, Encoder encoder) {
protected PassageFormatter getPassageFormatter(HitContext hitContext, SearchContextHighlight.Field field, Encoder encoder) {
CustomPassageFormatter passageFormatter = new CustomPassageFormatter(field.fieldOptions().preTags()[0],
field.fieldOptions().postTags()[0], encoder);
return passageFormatter;
}


protected Analyzer getAnalyzer(DocumentMapper docMapper, MappedFieldType type) {
protected Analyzer getAnalyzer(DocumentMapper docMapper, MappedFieldType type, HitContext hitContext) {
return HighlightUtils.getAnalyzer(docMapper, type);
}

Expand Down