Skip to content

Commit

Permalink
Fallback to extract terms if MultiPhraseQuery is large
Browse files Browse the repository at this point in the history
Currently if MPQ is very large highlighing can take down a node
or cause high CPU / RAM consumption. If the query grows > 16 terms
we just extract the terms and do term by term highlighting.

Closes  elastic#3142 elastic#3128
  • Loading branch information
s1monw committed Jun 6, 2013
1 parent f995c9c commit 1c513bc
Show file tree
Hide file tree
Showing 2 changed files with 66 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,21 @@ void flatten(Query sourceQuery, IndexReader reader, Collection<Query> flatQuerie
}

private void convertMultiPhraseQuery(int currentPos, int[] termsIdx, MultiPhraseQuery orig, List<Term[]> terms, int[] pos, IndexReader reader, Collection<Query> flatQueries) throws IOException {
if (currentPos == 0) {
// if we have more than 16 terms
int numTerms = 0;
for (Term[] currentPosTerm : terms) {
numTerms += currentPosTerm.length;
}
if (numTerms > 16) {
for (Term[] currentPosTerm : terms) {
for (Term term : currentPosTerm) {
super.flatten(new TermQuery(term), reader, flatQueries);
}
}
return;
}
}
/*
* we walk all possible ways and for each path down the MPQ we create a PhraseQuery this is what FieldQuery supports.
* It seems expensive but most queries will pretty small.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHighlight;
import static org.hamcrest.MatcherAssert.assertThat;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.startsWith;
import static org.testng.Assert.fail;

/**
Expand Down Expand Up @@ -125,6 +126,56 @@ public void testNgramHighlightingWithBrokenPositions() throws ElasticSearchExcep
assertHighlight(search, 0, "name.autocomplete", 0, equalTo("ARCO<em>TEL</em> Ho<em>tel</em>s <em>Deut</em>schland"));
}



@Test
public void testMultiPhraseCutoff() throws ElasticSearchException, IOException {
/*
* MultiPhraseQuery can literally kill an entire node if there are too many terms in the
* query. We cut off and extract terms if there are more than 16 terms in the query
*/
prepareCreate("test")
.addMapping("test", jsonBuilder()
.startObject()
.startObject("test")
.startObject("properties")
.startObject("body")
.field("type", "string")
.field("index_analyzer", "custom_analyzer")
.field("search_analyzer", "custom_analyzer")
.field("term_vector", "with_positions_offsets")
.endObject()
.endObject()
.endObject()
.endObject())
.setSettings(ImmutableSettings.settingsBuilder()
.put("index.number_of_shards", 1)
.put("index.number_of_replicas", 0)
.put("analysis.filter.wordDelimiter.type", "word_delimiter")
.put("analysis.filter.wordDelimiter.type.split_on_numerics", false)
.put("analysis.filter.wordDelimiter.generate_word_parts", true)
.put("analysis.filter.wordDelimiter.generate_number_parts", true)
.put("analysis.filter.wordDelimiter.catenate_words", true)
.put("analysis.filter.wordDelimiter.catenate_numbers", true)
.put("analysis.filter.wordDelimiter.catenate_all", false)
.put("analysis.analyzer.custom_analyzer.tokenizer", "whitespace")
.putArray("analysis.analyzer.custom_analyzer.filter", "lowercase", "wordDelimiter"))
.execute().actionGet();

ensureGreen();
client().prepareIndex("test", "test", "1")
.setSource(XContentFactory.jsonBuilder()
.startObject()
.field("body", "Test: http://www.facebook.com http://elasticsearch.org http://xing.com http://cnn.com http://quora.com http://twitter.com this is a test for highlighting feature Test: http://www.facebook.com http://elasticsearch.org http://xing.com http://cnn.com http://quora.com http://twitter.com this is a test for highlighting feature")
.endObject())
.execute().actionGet();
refresh();
SearchResponse search = client().prepareSearch().setQuery(matchQuery("body", "Test: http://www.facebook.com ").type(Type.PHRASE)).addHighlightedField("body").execute().actionGet();
assertHighlight(search, 0, "body", 0, startsWith("<em>Test: http://www.facebook.com</em>"));
search = client().prepareSearch().setQuery(matchQuery("body", "Test: http://www.facebook.com http://elasticsearch.org http://xing.com http://cnn.com http://quora.com http://twitter.com this is a test for highlighting feature Test: http://www.facebook.com http://elasticsearch.org http://xing.com http://cnn.com http://quora.com http://twitter.com this is a test for highlighting feature").type(Type.PHRASE)).addHighlightedField("body").execute().actionGet();
assertHighlight(search, 0, "body", 0, equalTo("<em>Test</em>: <em>http</em>://<em>www</em>.<em>facebook</em>.<em>com</em> <em>http</em>://<em>elasticsearch</em>.<em>org</em> <em>http</em>://<em>xing</em>.<em>com</em> <em>http</em>://<em>cnn</em>.<em>com</em> <em>http</em>://<em>quora</em>.com"));
}

@Test
public void testNgramHighlightingPreLucene42() throws ElasticSearchException, IOException {
boolean[] doStore = {true, false};
Expand Down

0 comments on commit 1c513bc

Please sign in to comment.