diff --git a/src/main/java/org/elasticsearch/search/highlight/FastVectorHighlighter.java b/src/main/java/org/elasticsearch/search/highlight/FastVectorHighlighter.java index 74402e1b5bfb7..10c686f50d74b 100644 --- a/src/main/java/org/elasticsearch/search/highlight/FastVectorHighlighter.java +++ b/src/main/java/org/elasticsearch/search/highlight/FastVectorHighlighter.java @@ -18,11 +18,20 @@ */ package org.elasticsearch.search.highlight; -import com.google.common.collect.Maps; +import java.util.Map; + import org.apache.lucene.search.highlight.DefaultEncoder; import org.apache.lucene.search.highlight.Encoder; import org.apache.lucene.search.highlight.SimpleHTMLEncoder; -import org.apache.lucene.search.vectorhighlight.*; +import org.apache.lucene.search.vectorhighlight.BaseFragmentsBuilder; +import org.apache.lucene.search.vectorhighlight.BoundaryScanner; +import org.apache.lucene.search.vectorhighlight.CustomFieldQuery; +import org.apache.lucene.search.vectorhighlight.FieldQuery; +import org.apache.lucene.search.vectorhighlight.FragListBuilder; +import org.apache.lucene.search.vectorhighlight.FragmentsBuilder; +import org.apache.lucene.search.vectorhighlight.ScoreOrderFragmentsBuilder; +import org.apache.lucene.search.vectorhighlight.SimpleFragListBuilder; +import org.apache.lucene.search.vectorhighlight.SingleFragListBuilder; import org.elasticsearch.ElasticSearchIllegalArgumentException; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.lucene.search.vectorhighlight.SimpleBoundaryScanner2; @@ -31,11 +40,12 @@ import org.elasticsearch.index.mapper.FieldMapper; import org.elasticsearch.search.fetch.FetchPhaseExecutionException; import org.elasticsearch.search.fetch.FetchSubPhase; +import org.elasticsearch.search.highlight.vectorhighlight.SimpleFragmentsBuilder; import org.elasticsearch.search.highlight.vectorhighlight.SourceScoreOrderFragmentsBuilder; import org.elasticsearch.search.highlight.vectorhighlight.SourceSimpleFragmentsBuilder; import org.elasticsearch.search.internal.SearchContext; -import java.util.Map; +import com.google.common.collect.Maps; /** * @@ -89,7 +99,7 @@ public HighlightField highlight(HighlighterContext highlighterContext) { fragListBuilder = new SingleFragListBuilder(); if (mapper.fieldType().stored()) { - fragmentsBuilder = new SimpleFragmentsBuilder(field.preTags(), field.postTags(), boundaryScanner); + fragmentsBuilder = new SimpleFragmentsBuilder(mapper, field.preTags(), field.postTags(), boundaryScanner); } else { fragmentsBuilder = new SourceSimpleFragmentsBuilder(mapper, context, field.preTags(), field.postTags(), boundaryScanner); } @@ -103,7 +113,7 @@ public HighlightField highlight(HighlighterContext highlighterContext) { } } else { if (mapper.fieldType().stored()) { - fragmentsBuilder = new SimpleFragmentsBuilder(field.preTags(), field.postTags(), boundaryScanner); + fragmentsBuilder = new SimpleFragmentsBuilder(mapper, field.preTags(), field.postTags(), boundaryScanner); } else { fragmentsBuilder = new SourceSimpleFragmentsBuilder(mapper, context, field.preTags(), field.postTags(), boundaryScanner); } diff --git a/src/main/java/org/elasticsearch/search/highlight/vectorhighlight/SimpleFragmentsBuilder.java b/src/main/java/org/elasticsearch/search/highlight/vectorhighlight/SimpleFragmentsBuilder.java new file mode 100644 index 0000000000000..d747eafd6819b --- /dev/null +++ b/src/main/java/org/elasticsearch/search/highlight/vectorhighlight/SimpleFragmentsBuilder.java @@ -0,0 +1,45 @@ +/* + * Licensed to Elastic Search and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Elastic Search licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.search.highlight.vectorhighlight; + +import org.apache.lucene.document.Field; +import org.apache.lucene.search.highlight.Encoder; +import org.apache.lucene.search.vectorhighlight.BoundaryScanner; +import org.apache.lucene.search.vectorhighlight.FieldFragList.WeightedFragInfo; +import org.elasticsearch.index.mapper.FieldMapper; + +/** + * Direct Subclass of Lucene's org.apache.lucene.search.vectorhighlight.SimpleFragmentsBuilder + * that corrects offsets for broken analysis chains. + */ +public class SimpleFragmentsBuilder extends org.apache.lucene.search.vectorhighlight.SimpleFragmentsBuilder { + protected final FieldMapper mapper; + + public SimpleFragmentsBuilder(FieldMapper mapper, + String[] preTags, String[] postTags, BoundaryScanner boundaryScanner) { + super(preTags, postTags, boundaryScanner); + this.mapper = mapper; + } + + @Override + protected String makeFragment( StringBuilder buffer, int[] index, Field[] values, WeightedFragInfo fragInfo, + String[] preTags, String[] postTags, Encoder encoder ){ + return super.makeFragment(buffer, index, values, FragmentBuilderHelper.fixWeightedFragInfo(mapper, values, fragInfo), preTags, postTags, encoder); + } +} diff --git a/src/main/java/org/elasticsearch/search/highlight/vectorhighlight/SourceSimpleFragmentsBuilder.java b/src/main/java/org/elasticsearch/search/highlight/vectorhighlight/SourceSimpleFragmentsBuilder.java index c2f1237b7a4cf..b9d362c87aa0d 100644 --- a/src/main/java/org/elasticsearch/search/highlight/vectorhighlight/SourceSimpleFragmentsBuilder.java +++ b/src/main/java/org/elasticsearch/search/highlight/vectorhighlight/SourceSimpleFragmentsBuilder.java @@ -23,18 +23,12 @@ import org.apache.lucene.document.TextField; import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.IndexReader; -import org.apache.lucene.search.highlight.Encoder; import org.apache.lucene.search.vectorhighlight.BoundaryScanner; -import org.apache.lucene.search.vectorhighlight.SimpleFragmentsBuilder; -import org.apache.lucene.search.vectorhighlight.FieldFragList.WeightedFragInfo; -import org.apache.lucene.search.vectorhighlight.FieldFragList.WeightedFragInfo.SubInfo; import org.elasticsearch.index.mapper.FieldMapper; import org.elasticsearch.search.internal.SearchContext; import org.elasticsearch.search.lookup.SearchLookup; import java.io.IOException; -import java.util.Collections; -import java.util.Comparator; import java.util.List; /** @@ -42,14 +36,11 @@ */ public class SourceSimpleFragmentsBuilder extends SimpleFragmentsBuilder { - private final FieldMapper mapper; - private final SearchContext searchContext; public SourceSimpleFragmentsBuilder(FieldMapper mapper, SearchContext searchContext, String[] preTags, String[] postTags, BoundaryScanner boundaryScanner) { - super(preTags, postTags, boundaryScanner); - this.mapper = mapper; + super(mapper, preTags, postTags, boundaryScanner); this.searchContext = searchContext; } @@ -73,9 +64,4 @@ protected Field[] getFields(IndexReader reader, int docId, String fieldName) thr return fields; } - protected String makeFragment( StringBuilder buffer, int[] index, Field[] values, WeightedFragInfo fragInfo, - String[] preTags, String[] postTags, Encoder encoder ){ - return super.makeFragment(buffer, index, values, FragmentBuilderHelper.fixWeightedFragInfo(mapper, values, fragInfo), preTags, postTags, encoder); - } - } diff --git a/src/test/java/org/elasticsearch/test/integration/search/highlight/HighlighterSearchTests.java b/src/test/java/org/elasticsearch/test/integration/search/highlight/HighlighterSearchTests.java index bf5ad7aeacdd4..da6fa3b3231e1 100644 --- a/src/test/java/org/elasticsearch/test/integration/search/highlight/HighlighterSearchTests.java +++ b/src/test/java/org/elasticsearch/test/integration/search/highlight/HighlighterSearchTests.java @@ -127,71 +127,94 @@ public void testNgramHighlightingWithBrokenPositions() throws ElasticSearchExcep @Test public void testNgramHighlightingPreLucene42() throws ElasticSearchException, IOException { - client().admin().indices().prepareCreate("test") - .addMapping("test", jsonBuilder() - .startObject() - .startObject("test") - .startObject("properties") - .startObject("name") - .field("type", "string") - .field("index_analyzer", "name_index_analyzer") - .field("search_analyzer", "name_search_analyzer") - .field("term_vector", "with_positions_offsets") - .endObject() - .startObject("name2") - .field("type", "string") - .field("index_analyzer", "name2_index_analyzer") - .field("search_analyzer", "name_search_analyzer") - .field("term_vector", "with_positions_offsets") + boolean[] doStore = {true, false}; + for (boolean store : doStore) { + wipeIndex("test"); + client().admin().indices().prepareCreate("test") + .addMapping("test", jsonBuilder() + .startObject() + .startObject("test") + .startObject("properties") + .startObject("name") + .field("type", "string") + .field("index_analyzer", "name_index_analyzer") + .field("search_analyzer", "name_search_analyzer") + .field("term_vector", "with_positions_offsets") + .field("store", store ? "yes" : "no") + .endObject() + .startObject("name2") + .field("type", "string") + .field("index_analyzer", "name2_index_analyzer") + .field("search_analyzer", "name_search_analyzer") + .field("term_vector", "with_positions_offsets") + .field("store", store ? "yes" : "no") + .endObject() .endObject() .endObject() - .endObject() - .endObject()) - .setSettings(ImmutableSettings.settingsBuilder() - .put("index.number_of_shards", 2) - .put("analysis.filter.my_ngram.max_gram", 20) - .put("analysis.filter.my_ngram.version", "4.1") - .put("analysis.filter.my_ngram.min_gram", 1) - .put("analysis.filter.my_ngram.type", "ngram") - .put("analysis.tokenizer.my_ngramt.max_gram", 20) - .put("analysis.tokenizer.my_ngramt.version", "4.1") - .put("analysis.tokenizer.my_ngramt.min_gram", 1) - .put("analysis.tokenizer.my_ngramt.type", "ngram") - .put("analysis.analyzer.name_index_analyzer.tokenizer", "my_ngramt") - .put("analysis.analyzer.name2_index_analyzer.tokenizer", "whitespace") - .put("analysis.analyzer.name2_index_analyzer.filter", "my_ngram") - .put("analysis.analyzer.name_search_analyzer.tokenizer", "whitespace")) - .execute().actionGet(); - client().admin().cluster().prepareHealth().setWaitForEvents(Priority.LANGUID).setWaitForYellowStatus().execute().actionGet(); - client().prepareIndex("test", "test", "1") + .endObject()) + .setSettings(ImmutableSettings.settingsBuilder() + .put("index.number_of_shards", 2) + .put("analysis.filter.my_ngram.max_gram", 20) + .put("analysis.filter.my_ngram.version", "4.1") + .put("analysis.filter.my_ngram.min_gram", 1) + .put("analysis.filter.my_ngram.type", "ngram") + .put("analysis.tokenizer.my_ngramt.max_gram", 20) + .put("analysis.tokenizer.my_ngramt.version", "4.1") + .put("analysis.tokenizer.my_ngramt.min_gram", 1) + .put("analysis.tokenizer.my_ngramt.type", "ngram") + .put("analysis.analyzer.name_index_analyzer.tokenizer", "my_ngramt") + .put("analysis.analyzer.name2_index_analyzer.tokenizer", "whitespace") + .putArray("analysis.analyzer.name2_index_analyzer.filter", "lowercase", "my_ngram") + .put("analysis.analyzer.name_search_analyzer.tokenizer", "whitespace") + .put("analysis.analyzer.name_search_analyzer.filter", "lowercase")) + .execute().actionGet(); + ensureYellow(); + client().prepareIndex("test", "test", "1") + .setSource(XContentFactory.jsonBuilder() + .startObject() + .field("name", "logicacmg ehemals avinci - the know how company") + .field("name2", "logicacmg ehemals avinci - the know how company") + .endObject()) + .execute().actionGet(); + + client().prepareIndex("test", "test", "2") .setSource(XContentFactory.jsonBuilder() .startObject() - .field("name", "logicacmg ehemals avinci - the know how company") - .field("name2", "logicacmg ehemals avinci - the know how company") + .field("name", "avinci, unilog avinci, logicacmg, logica" ) + .field("name2", "avinci, unilog avinci, logicacmg, logica") .endObject()) - .setRefresh(true).execute().actionGet(); - SearchResponse search = client().prepareSearch().setQuery(matchQuery("name", "logica m")).addHighlightedField("name").execute().actionGet(); - assertHighlight(search, 0, "name", 0, equalTo("logicacmg ehemals avinci - the know how company")); - - search = client().prepareSearch().setQuery(matchQuery("name", "logica ma")).addHighlightedField("name").execute() - .actionGet(); - assertHighlight(search, 0, "name", 0, equalTo("logicacmg ehemals avinci - the know how company")); - - search = client().prepareSearch().setQuery(matchQuery("name", "logica")).addHighlightedField("name").execute().actionGet(); - assertHighlight(search, 0, "name", 0, equalTo("logicacmg ehemals avinci - the know how company")); - - search = client().prepareSearch().setQuery(matchQuery("name2", "logica m")).addHighlightedField("name2").execute().actionGet(); - assertHighlight(search, 0, "name2", 0, equalTo("logicacmg ehemals avinci - the know how company")); - - search = client().prepareSearch().setQuery(matchQuery("name2", "logica ma")).addHighlightedField("name2").execute() - .actionGet(); - assertHighlight(search, 0, "name2", 0, equalTo("logicacmg ehemals avinci - the know how company")); + .execute().actionGet(); + refresh(); + + SearchResponse search = client().prepareSearch().setQuery(constantScoreQuery(matchQuery("name", "logica m"))).addHighlightedField("name").execute().actionGet(); + assertHighlight(search, 0, "name", 0, equalTo("logicacmg ehemals avinci - the know how company")); + assertHighlight(search, 1, "name", 0, equalTo("avinci, unilog avinci, logicacmg, logica")); + + search = client().prepareSearch().setQuery(constantScoreQuery(matchQuery("name", "logica ma"))).addHighlightedField("name").execute() + .actionGet(); + assertHighlight(search, 0, "name", 0, equalTo("logicacmg ehemals avinci - the know how company")); + assertHighlight(search, 1, "name", 0, equalTo("avinci, unilog avinci, logicacmg, logica")); + + search = client().prepareSearch().setQuery(constantScoreQuery(matchQuery("name", "logica"))).addHighlightedField("name").execute().actionGet(); + assertHighlight(search, 0, "name", 0, equalTo("logicacmg ehemals avinci - the know how company")); + + search = client().prepareSearch().setQuery(constantScoreQuery(matchQuery("name2", "logica m"))).addHighlightedField("name2").execute().actionGet(); + assertHighlight(search, 0, "name2", 0, equalTo("logicacmg ehemals avinci - the know how company")); + assertHighlight(search, 1, "name2", 0, equalTo("avinci, unilog avinci, logicacmg, logica")); + + search = client().prepareSearch().setQuery(constantScoreQuery(matchQuery("name2", "logica ma"))).addHighlightedField("name2").execute() + .actionGet(); + assertHighlight(search, 0, "name2", 0, equalTo("logicacmg ehemals avinci - the know how company")); + assertHighlight(search, 1, "name2", 0, equalTo("avinci, unilog avinci, logicacmg, logica")); + + + search = client().prepareSearch().setQuery(constantScoreQuery(matchQuery("name2", "logica"))).addHighlightedField("name2").execute().actionGet(); + assertHighlight(search, 0, "name2", 0, equalTo("logicacmg ehemals avinci - the know how company")); + assertHighlight(search, 1, "name2", 0, equalTo("avinci, unilog avinci, logicacmg, logica")); + } - search = client().prepareSearch().setQuery(matchQuery("name2", "logica")).addHighlightedField("name2").execute().actionGet(); - assertHighlight(search, 0, "name2", 0, equalTo("logicacmg ehemals avinci - the know how company")); } - @Test public void testNgramHighlighting() throws ElasticSearchException, IOException { client().admin().indices().prepareCreate("test")