diff --git a/src/main/java/org/apache/lucene/search/vectorhighlight/CustomFieldQuery.java b/src/main/java/org/apache/lucene/search/vectorhighlight/CustomFieldQuery.java index 0f11d7915da2b..5fccc6a84227f 100644 --- a/src/main/java/org/apache/lucene/search/vectorhighlight/CustomFieldQuery.java +++ b/src/main/java/org/apache/lucene/search/vectorhighlight/CustomFieldQuery.java @@ -104,6 +104,21 @@ void flatten(Query sourceQuery, IndexReader reader, Collection flatQuerie } private void convertMultiPhraseQuery(int currentPos, int[] termsIdx, MultiPhraseQuery orig, List terms, int[] pos, IndexReader reader, Collection flatQueries) throws IOException { + if (currentPos == 0) { + // if we have more than 16 terms + int numTerms = 0; + for (Term[] currentPosTerm : terms) { + numTerms += currentPosTerm.length; + } + if (numTerms > 16) { + for (Term[] currentPosTerm : terms) { + for (Term term : currentPosTerm) { + super.flatten(new TermQuery(term), reader, flatQueries); + } + } + return; + } + } /* * we walk all possible ways and for each path down the MPQ we create a PhraseQuery this is what FieldQuery supports. * It seems expensive but most queries will pretty small. diff --git a/src/test/java/org/elasticsearch/test/integration/search/highlight/HighlighterSearchTests.java b/src/test/java/org/elasticsearch/test/integration/search/highlight/HighlighterSearchTests.java index 4c24c2240af06..d0dc875749968 100644 --- a/src/test/java/org/elasticsearch/test/integration/search/highlight/HighlighterSearchTests.java +++ b/src/test/java/org/elasticsearch/test/integration/search/highlight/HighlighterSearchTests.java @@ -57,7 +57,11 @@ import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHighlight; import static org.hamcrest.MatcherAssert.assertThat; import static org.hamcrest.Matchers.equalTo; +<<<<<<< HEAD import static org.hamcrest.Matchers.instanceOf; +======= +import static org.hamcrest.Matchers.startsWith; +>>>>>>> 6e73bde... Fallback to extract terms if MultiPhraseQuery is large import static org.testng.Assert.fail; /** @@ -149,6 +153,56 @@ public void testNgramHighlightingWithBrokenPositions() throws ElasticSearchExcep assertHighlight(search, 0, "name.autocomplete", 0, equalTo("ARCOTEL Hotels Deutschland")); } + + + @Test + public void testMultiPhraseCutoff() throws ElasticSearchException, IOException { + /* + * MultiPhraseQuery can literally kill an entire node if there are too many terms in the + * query. We cut off and extract terms if there are more than 16 terms in the query + */ + prepareCreate("test") + .addMapping("test", jsonBuilder() + .startObject() + .startObject("test") + .startObject("properties") + .startObject("body") + .field("type", "string") + .field("index_analyzer", "custom_analyzer") + .field("search_analyzer", "custom_analyzer") + .field("term_vector", "with_positions_offsets") + .endObject() + .endObject() + .endObject() + .endObject()) + .setSettings(ImmutableSettings.settingsBuilder() + .put("index.number_of_shards", 1) + .put("index.number_of_replicas", 0) + .put("analysis.filter.wordDelimiter.type", "word_delimiter") + .put("analysis.filter.wordDelimiter.type.split_on_numerics", false) + .put("analysis.filter.wordDelimiter.generate_word_parts", true) + .put("analysis.filter.wordDelimiter.generate_number_parts", true) + .put("analysis.filter.wordDelimiter.catenate_words", true) + .put("analysis.filter.wordDelimiter.catenate_numbers", true) + .put("analysis.filter.wordDelimiter.catenate_all", false) + .put("analysis.analyzer.custom_analyzer.tokenizer", "whitespace") + .putArray("analysis.analyzer.custom_analyzer.filter", "lowercase", "wordDelimiter")) + .execute().actionGet(); + + ensureGreen(); + client().prepareIndex("test", "test", "1") + .setSource(XContentFactory.jsonBuilder() + .startObject() + .field("body", "Test: http://www.facebook.com http://elasticsearch.org http://xing.com http://cnn.com http://quora.com http://twitter.com this is a test for highlighting feature Test: http://www.facebook.com http://elasticsearch.org http://xing.com http://cnn.com http://quora.com http://twitter.com this is a test for highlighting feature") + .endObject()) + .execute().actionGet(); + refresh(); + SearchResponse search = client().prepareSearch().setQuery(matchQuery("body", "Test: http://www.facebook.com ").type(Type.PHRASE)).addHighlightedField("body").execute().actionGet(); + assertHighlight(search, 0, "body", 0, startsWith("Test: http://www.facebook.com")); + search = client().prepareSearch().setQuery(matchQuery("body", "Test: http://www.facebook.com http://elasticsearch.org http://xing.com http://cnn.com http://quora.com http://twitter.com this is a test for highlighting feature Test: http://www.facebook.com http://elasticsearch.org http://xing.com http://cnn.com http://quora.com http://twitter.com this is a test for highlighting feature").type(Type.PHRASE)).addHighlightedField("body").execute().actionGet(); + assertHighlight(search, 0, "body", 0, equalTo("Test: http://www.facebook.com http://elasticsearch.org http://xing.com http://cnn.com http://quora.com")); + } + @Test public void testNgramHighlightingPreLucene42() throws ElasticSearchException, IOException { try {