diff --git a/plugins/mapper-annotated-text/src/main/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextFieldMapper.java b/plugins/mapper-annotated-text/src/main/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextFieldMapper.java index 79fefbc64d407..2aadfd2218590 100644 --- a/plugins/mapper-annotated-text/src/main/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextFieldMapper.java +++ b/plugins/mapper-annotated-text/src/main/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextFieldMapper.java @@ -27,17 +27,17 @@ import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute; -import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttribute; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.Term; -import org.apache.lucene.search.MultiPhraseQuery; import org.apache.lucene.search.NormsFieldExistsQuery; -import org.apache.lucene.search.PhraseQuery; +import org.apache.lucene.search.PrefixQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper; +import org.apache.lucene.search.spans.SpanQuery; import org.elasticsearch.ElasticsearchParseException; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.xcontent.XContentBuilder; @@ -603,62 +603,26 @@ public Query existsQuery(QueryShardContext context) { } @Override - public Query phraseQuery(String field, TokenStream stream, int slop, boolean enablePosIncrements) throws IOException { - PhraseQuery.Builder builder = new PhraseQuery.Builder(); - builder.setSlop(slop); - - TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class); - PositionIncrementAttribute posIncrAtt = stream.getAttribute(PositionIncrementAttribute.class); - int position = -1; - - stream.reset(); - while (stream.incrementToken()) { - if (enablePosIncrements) { - position += posIncrAtt.getPositionIncrement(); - } - else { - position += 1; - } - builder.add(new Term(field, termAtt.getBytesRef()), position); - } - - return builder.build(); + public SpanQuery spanPrefixQuery(String value, SpanMultiTermQueryWrapper.SpanRewriteMethod method, QueryShardContext context) { + SpanMultiTermQueryWrapper spanMulti = + new SpanMultiTermQueryWrapper<>(new PrefixQuery(new Term(name(), indexedValueForSearch(value)))); + spanMulti.setRewriteMethod(method); + return spanMulti; } @Override - public Query multiPhraseQuery(String field, TokenStream stream, int slop, boolean enablePositionIncrements) throws IOException { - - MultiPhraseQuery.Builder mpqb = new MultiPhraseQuery.Builder(); - mpqb.setSlop(slop); - - TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class); - - PositionIncrementAttribute posIncrAtt = stream.getAttribute(PositionIncrementAttribute.class); - int position = -1; - - List multiTerms = new ArrayList<>(); - stream.reset(); - while (stream.incrementToken()) { - int positionIncrement = posIncrAtt.getPositionIncrement(); + public Query phraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements) throws IOException { + return TextFieldMapper.createPhraseQuery(stream, name(), slop, enablePositionIncrements); + } - if (positionIncrement > 0 && multiTerms.size() > 0) { - if (enablePositionIncrements) { - mpqb.add(multiTerms.toArray(new Term[0]), position); - } else { - mpqb.add(multiTerms.toArray(new Term[0])); - } - multiTerms.clear(); - } - position += positionIncrement; - multiTerms.add(new Term(field, termAtt.getBytesRef())); - } + @Override + public Query multiPhraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements) throws IOException { + return TextFieldMapper.createPhraseQuery(stream, name(), slop, enablePositionIncrements); + } - if (enablePositionIncrements) { - mpqb.add(multiTerms.toArray(new Term[0]), position); - } else { - mpqb.add(multiTerms.toArray(new Term[0])); - } - return mpqb.build(); + @Override + public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions) throws IOException { + return TextFieldMapper.createPhrasePrefixQuery(stream, name(), slop, maxExpansions); } } diff --git a/server/src/main/java/org/elasticsearch/common/lucene/search/MultiPhrasePrefixQuery.java b/server/src/main/java/org/elasticsearch/common/lucene/search/MultiPhrasePrefixQuery.java index b8e1039b2df1d..57f60add714a1 100644 --- a/server/src/main/java/org/elasticsearch/common/lucene/search/MultiPhrasePrefixQuery.java +++ b/server/src/main/java/org/elasticsearch/common/lucene/search/MultiPhrasePrefixQuery.java @@ -39,16 +39,21 @@ import java.util.Iterator; import java.util.List; import java.util.ListIterator; +import java.util.Objects; public class MultiPhrasePrefixQuery extends Query { - private String field; + private final String field; private ArrayList termArrays = new ArrayList<>(); private ArrayList positions = new ArrayList<>(); private int maxExpansions = Integer.MAX_VALUE; private int slop = 0; + public MultiPhrasePrefixQuery(String field) { + this.field = Objects.requireNonNull(field); + } + /** * Sets the phrase slop for this query. * @@ -102,9 +107,6 @@ public void add(Term[] terms) { * @see org.apache.lucene.search.PhraseQuery.Builder#add(Term, int) */ public void add(Term[] terms, int position) { - if (termArrays.size() == 0) - field = terms[0].field(); - for (int i = 0; i < terms.length; i++) { if (terms[i].field() != field) { throw new IllegalArgumentException( @@ -212,7 +214,7 @@ private void getPrefixTerms(ObjectHashSet terms, final Term prefix, final @Override public final String toString(String f) { StringBuilder buffer = new StringBuilder(); - if (field == null || !field.equals(f)) { + if (field.equals(f) == false) { buffer.append(field); buffer.append(":"); } diff --git a/server/src/main/java/org/elasticsearch/common/lucene/search/SpanBooleanQueryRewriteWithMaxClause.java b/server/src/main/java/org/elasticsearch/common/lucene/search/SpanBooleanQueryRewriteWithMaxClause.java new file mode 100644 index 0000000000000..e78770ed2a85a --- /dev/null +++ b/server/src/main/java/org/elasticsearch/common/lucene/search/SpanBooleanQueryRewriteWithMaxClause.java @@ -0,0 +1,119 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.common.lucene.search; + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexReaderContext; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.Term; +import org.apache.lucene.index.Terms; +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.queries.SpanMatchNoDocsQuery; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.MultiTermQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper; +import org.apache.lucene.search.spans.SpanOrQuery; +import org.apache.lucene.search.spans.SpanQuery; +import org.apache.lucene.search.spans.SpanTermQuery; +import org.apache.lucene.util.BytesRef; + +import java.io.IOException; +import java.util.Collection; +import java.util.HashSet; +import java.util.Set; + +/** + * A span rewrite method that extracts the first maxExpansions terms + * that match the {@link MultiTermQuery} in the terms dictionary. + * The rewrite throws an error if more than maxExpansions terms are found and hardLimit + * is set. + */ +public class SpanBooleanQueryRewriteWithMaxClause extends SpanMultiTermQueryWrapper.SpanRewriteMethod { + private final int maxExpansions; + private final boolean hardLimit; + + public SpanBooleanQueryRewriteWithMaxClause() { + this(BooleanQuery.getMaxClauseCount(), true); + } + + public SpanBooleanQueryRewriteWithMaxClause(int maxExpansions, boolean hardLimit) { + this.maxExpansions = maxExpansions; + this.hardLimit = hardLimit; + } + + public int getMaxExpansions() { + return maxExpansions; + } + + public boolean isHardLimit() { + return hardLimit; + } + + @Override + public SpanQuery rewrite(IndexReader reader, MultiTermQuery query) throws IOException { + final MultiTermQuery.RewriteMethod delegate = new MultiTermQuery.RewriteMethod() { + @Override + public Query rewrite(IndexReader reader, MultiTermQuery query) throws IOException { + Collection queries = collectTerms(reader, query); + if (queries.size() == 0) { + return new SpanMatchNoDocsQuery(query.getField(), "no expansion found for " + query.toString()); + } else if (queries.size() == 1) { + return queries.iterator().next(); + } else { + return new SpanOrQuery(queries.toArray(new SpanQuery[0])); + } + } + + private Collection collectTerms(IndexReader reader, MultiTermQuery query) throws IOException { + Set queries = new HashSet<>(); + IndexReaderContext topReaderContext = reader.getContext(); + for (LeafReaderContext context : topReaderContext.leaves()) { + final Terms terms = context.reader().terms(query.getField()); + if (terms == null) { + // field does not exist + continue; + } + + final TermsEnum termsEnum = getTermsEnum(query, terms, null); + assert termsEnum != null; + + if (termsEnum == TermsEnum.EMPTY) + continue; + + BytesRef bytes; + while ((bytes = termsEnum.next()) != null) { + if (queries.size() >= maxExpansions) { + if (hardLimit) { + throw new RuntimeException("[" + query.toString() + " ] " + + "exceeds maxClauseCount [ Boolean maxClauseCount is set to " + BooleanQuery.getMaxClauseCount() + "]"); + } else { + return queries; + } + } + queries.add(new SpanTermQuery(new Term(query.getField(), bytes))); + } + } + return queries; + } + }; + return (SpanQuery) delegate.rewrite(reader, query); + } +} diff --git a/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java b/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java index 741b2300a4678..f785e01125f69 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java @@ -35,6 +35,8 @@ import org.apache.lucene.search.TermInSetQuery; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.intervals.IntervalsSource; +import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper; +import org.apache.lucene.search.spans.SpanQuery; import org.apache.lucene.util.BytesRef; import org.elasticsearch.ElasticsearchParseException; import org.elasticsearch.common.Nullable; @@ -365,16 +367,26 @@ public Query regexpQuery(String value, int flags, int maxDeterminizedStates, @Nu public abstract Query existsQuery(QueryShardContext context); - public Query phraseQuery(String field, TokenStream stream, int slop, boolean enablePositionIncrements) throws IOException { + public Query phraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements) throws IOException { throw new IllegalArgumentException("Can only use phrase queries on text fields - not on [" + name + "] which is of type [" + typeName() + "]"); } - public Query multiPhraseQuery(String field, TokenStream stream, int slop, boolean enablePositionIncrements) throws IOException { + public Query multiPhraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements) throws IOException { throw new IllegalArgumentException("Can only use phrase queries on text fields - not on [" + name + "] which is of type [" + typeName() + "]"); } + public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions) throws IOException { + throw new IllegalArgumentException("Can only use phrase prefix queries on text fields - not on [" + name + + "] which is of type [" + typeName() + "]"); + } + + public SpanQuery spanPrefixQuery(String value, SpanMultiTermQueryWrapper.SpanRewriteMethod method, QueryShardContext context) { + throw new IllegalArgumentException("Can only use span prefix queries on text fields - not on [" + name + + "] which is of type [" + typeName() + "]"); + } + /** * Create an {@link IntervalsSource} to be used for proximity queries */ diff --git a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java index 1b25c7b9866f7..e5fc470e130bc 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java @@ -40,14 +40,23 @@ import org.apache.lucene.search.MultiTermQuery; import org.apache.lucene.search.NormsFieldExistsQuery; import org.apache.lucene.search.PhraseQuery; +import org.apache.lucene.search.PrefixQuery; import org.apache.lucene.search.Query; +import org.apache.lucene.search.SynonymQuery; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.intervals.IntervalsSource; +import org.apache.lucene.search.spans.FieldMaskingSpanQuery; +import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper; +import org.apache.lucene.search.spans.SpanNearQuery; +import org.apache.lucene.search.spans.SpanOrQuery; +import org.apache.lucene.search.spans.SpanQuery; +import org.apache.lucene.search.spans.SpanTermQuery; import org.apache.lucene.util.automaton.Automata; import org.apache.lucene.util.automaton.Automaton; import org.apache.lucene.util.automaton.Operations; import org.elasticsearch.Version; import org.elasticsearch.common.collect.Iterators; +import org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.support.XContentMapValues; @@ -60,6 +69,7 @@ import java.io.IOException; import java.util.ArrayList; +import java.util.Arrays; import java.util.Iterator; import java.util.List; import java.util.Map; @@ -598,6 +608,23 @@ public Query prefixQuery(String value, MultiTermQuery.RewriteMethod method, Quer return tq; } + @Override + public SpanQuery spanPrefixQuery(String value, SpanMultiTermQueryWrapper.SpanRewriteMethod method, QueryShardContext context) { + failIfNotIndexed(); + if (prefixFieldType != null + && value.length() >= prefixFieldType.minChars + && value.length() <= prefixFieldType.maxChars + && prefixFieldType.indexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0) { + + return new FieldMaskingSpanQuery(new SpanTermQuery(new Term(prefixFieldType.name(), indexedValueForSearch(value))), name()); + } else { + SpanMultiTermQueryWrapper spanMulti = + new SpanMultiTermQueryWrapper<>(new PrefixQuery(new Term(name(), indexedValueForSearch(value)))); + spanMulti.setRewriteMethod(method); + return spanMulti; + } + } + @Override public Query existsQuery(QueryShardContext context) { if (omitNorms()) { @@ -617,9 +644,9 @@ public IntervalsSource intervals(String text, int maxGaps, boolean ordered, Name } @Override - public Query phraseQuery(String field, TokenStream stream, int slop, boolean enablePosIncrements) throws IOException { - - if (indexPhrases && slop == 0 && hasGaps(cache(stream)) == false) { + public Query phraseQuery(TokenStream stream, int slop, boolean enablePosIncrements) throws IOException { + String field = name(); + if (indexPhrases && slop == 0 && hasGaps(stream) == false) { stream = new FixedShingleFilter(stream, 2); field = field + FAST_PHRASE_SUFFIX; } @@ -645,54 +672,85 @@ public Query phraseQuery(String field, TokenStream stream, int slop, boolean ena } @Override - public Query multiPhraseQuery(String field, TokenStream stream, int slop, boolean enablePositionIncrements) throws IOException { - - if (indexPhrases && slop == 0 && hasGaps(cache(stream)) == false) { + public Query multiPhraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements) throws IOException { + String field = name(); + if (indexPhrases && slop == 0 && hasGaps(stream) == false) { stream = new FixedShingleFilter(stream, 2); field = field + FAST_PHRASE_SUFFIX; } + return createPhraseQuery(stream, field, slop, enablePositionIncrements); + } - MultiPhraseQuery.Builder mpqb = new MultiPhraseQuery.Builder(); - mpqb.setSlop(slop); - - TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class); + @Override + public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions) throws IOException { + return analyzePhrasePrefix(stream, slop, maxExpansions); + } - PositionIncrementAttribute posIncrAtt = stream.getAttribute(PositionIncrementAttribute.class); - int position = -1; + private Query analyzePhrasePrefix(TokenStream stream, int slop, int maxExpansions) throws IOException { + final MultiPhrasePrefixQuery query = createPhrasePrefixQuery(stream, name(), slop, maxExpansions); - List multiTerms = new ArrayList<>(); - stream.reset(); - while (stream.incrementToken()) { - int positionIncrement = posIncrAtt.getPositionIncrement(); + if (slop > 0 + || prefixFieldType == null + || prefixFieldType.indexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) { + return query; + } - if (positionIncrement > 0 && multiTerms.size() > 0) { - if (enablePositionIncrements) { - mpqb.add(multiTerms.toArray(new Term[0]), position); - } else { - mpqb.add(multiTerms.toArray(new Term[0])); - } - multiTerms.clear(); + int lastPos = query.getTerms().length - 1; + final Term[][] terms = query.getTerms(); + final int[] positions = query.getPositions(); + for (Term term : terms[lastPos]) { + String value = term.text(); + if (value.length() < prefixFieldType.minChars || value.length() > prefixFieldType.maxChars) { + return query; } - position += positionIncrement; - multiTerms.add(new Term(field, termAtt.getBytesRef())); } - if (enablePositionIncrements) { - mpqb.add(multiTerms.toArray(new Term[0]), position); - } else { - mpqb.add(multiTerms.toArray(new Term[0])); + if (terms.length == 1) { + Term[] newTerms = Arrays.stream(terms[0]) + .map(term -> new Term(prefixFieldType.name(), term.bytes())) + .toArray(Term[]::new); + return new SynonymQuery(newTerms); } - return mpqb.build(); - } - private static CachingTokenFilter cache(TokenStream in) { - if (in instanceof CachingTokenFilter) { - return (CachingTokenFilter) in; + SpanNearQuery.Builder spanQuery = new SpanNearQuery.Builder(name(), true); + spanQuery.setSlop(slop); + int previousPos = -1; + for (int i = 0; i < terms.length; i++) { + Term[] posTerms = terms[i]; + int posInc = positions[i] - previousPos; + previousPos = positions[i]; + if (posInc > 1) { + spanQuery.addGap(posInc - 1); + } + if (i == lastPos) { + if (posTerms.length == 1) { + FieldMaskingSpanQuery fieldMask = + new FieldMaskingSpanQuery(new SpanTermQuery(new Term(prefixFieldType.name(), posTerms[0].bytes())), name()); + spanQuery.addClause(fieldMask); + } else { + SpanQuery[] queries = Arrays.stream(posTerms) + .map(term -> new FieldMaskingSpanQuery( + new SpanTermQuery(new Term(prefixFieldType.name(), term.bytes())), name()) + ) + .toArray(SpanQuery[]::new); + spanQuery.addClause(new SpanOrQuery(queries)); + } + } else { + if (posTerms.length == 1) { + spanQuery.addClause(new SpanTermQuery(posTerms[0])); + } else { + SpanTermQuery[] queries = Arrays.stream(posTerms) + .map(SpanTermQuery::new) + .toArray(SpanTermQuery[]::new); + spanQuery.addClause(new SpanOrQuery(queries)); + } + } } - return new CachingTokenFilter(in); + return spanQuery.build(); } - private static boolean hasGaps(CachingTokenFilter stream) throws IOException { + private static boolean hasGaps(TokenStream stream) throws IOException { + assert stream instanceof CachingTokenFilter; PositionIncrementAttribute posIncAtt = stream.getAttribute(PositionIncrementAttribute.class); stream.reset(); while (stream.incrementToken()) { @@ -870,4 +928,65 @@ protected void doXContentBody(XContentBuilder builder, boolean includeDefaults, builder.field("index_phrases", fieldType().indexPhrases); } } + + public static Query createPhraseQuery(TokenStream stream, String field, int slop, boolean enablePositionIncrements) throws IOException { + MultiPhraseQuery.Builder mpqb = new MultiPhraseQuery.Builder(); + mpqb.setSlop(slop); + + TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class); + + PositionIncrementAttribute posIncrAtt = stream.getAttribute(PositionIncrementAttribute.class); + int position = -1; + + List multiTerms = new ArrayList<>(); + stream.reset(); + while (stream.incrementToken()) { + int positionIncrement = posIncrAtt.getPositionIncrement(); + + if (positionIncrement > 0 && multiTerms.size() > 0) { + if (enablePositionIncrements) { + mpqb.add(multiTerms.toArray(new Term[0]), position); + } else { + mpqb.add(multiTerms.toArray(new Term[0])); + } + multiTerms.clear(); + } + position += positionIncrement; + multiTerms.add(new Term(field, termAtt.getBytesRef())); + } + + if (enablePositionIncrements) { + mpqb.add(multiTerms.toArray(new Term[0]), position); + } else { + mpqb.add(multiTerms.toArray(new Term[0])); + } + return mpqb.build(); + } + + public static MultiPhrasePrefixQuery createPhrasePrefixQuery(TokenStream stream, String field, + int slop, int maxExpansions) throws IOException { + MultiPhrasePrefixQuery builder = new MultiPhrasePrefixQuery(field); + builder.setSlop(slop); + builder.setMaxExpansions(maxExpansions); + + List currentTerms = new ArrayList<>(); + + TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class); + PositionIncrementAttribute posIncrAtt = stream.getAttribute(PositionIncrementAttribute.class); + + stream.reset(); + int position = -1; + while (stream.incrementToken()) { + if (posIncrAtt.getPositionIncrement() != 0) { + if (currentTerms.isEmpty() == false) { + builder.add(currentTerms.toArray(new Term[0]), position); + } + position += posIncrAtt.getPositionIncrement(); + currentTerms.clear(); + } + currentTerms.add(new Term(field, termAtt.getBytesRef())); + } + builder.add(currentTerms.toArray(new Term[0]), position); + return builder; + } } diff --git a/server/src/main/java/org/elasticsearch/index/query/SpanMultiTermQueryBuilder.java b/server/src/main/java/org/elasticsearch/index/query/SpanMultiTermQueryBuilder.java index 22fca7d1d0b8f..49e5e53e1ed91 100644 --- a/server/src/main/java/org/elasticsearch/index/query/SpanMultiTermQueryBuilder.java +++ b/server/src/main/java/org/elasticsearch/index/query/SpanMultiTermQueryBuilder.java @@ -18,31 +18,19 @@ */ package org.elasticsearch.index.query; -import org.apache.lucene.index.Term; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.TermStates; import org.apache.lucene.queries.SpanMatchNoDocsQuery; -import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.BoostQuery; import org.apache.lucene.search.ConstantScoreQuery; import org.apache.lucene.search.MatchNoDocsQuery; import org.apache.lucene.search.MultiTermQuery; -import org.apache.lucene.search.PrefixQuery; import org.apache.lucene.search.Query; -import org.apache.lucene.search.TermQuery; -import org.apache.lucene.search.spans.FieldMaskingSpanQuery; -import org.apache.lucene.search.ScoringRewrite; import org.apache.lucene.search.TopTermsRewrite; -import org.apache.lucene.search.spans.SpanBoostQuery; import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper; -import org.apache.lucene.search.spans.SpanOrQuery; -import org.apache.lucene.search.spans.SpanQuery; -import org.apache.lucene.search.spans.SpanTermQuery; -import org.elasticsearch.Version; import org.elasticsearch.common.ParseField; import org.elasticsearch.common.ParsingException; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.lucene.search.SpanBooleanQueryRewriteWithMaxClause; import org.elasticsearch.common.xcontent.LoggingDeprecationHandler; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentParser; @@ -50,8 +38,6 @@ import org.elasticsearch.index.query.support.QueryParsers; import java.io.IOException; -import java.util.ArrayList; -import java.util.List; import java.util.Objects; /** @@ -138,126 +124,53 @@ public static SpanMultiTermQueryBuilder fromXContent(XContentParser parser) thro return new SpanMultiTermQueryBuilder(subQuery).queryName(queryName).boost(boost); } - static class TopTermSpanBooleanQueryRewriteWithMaxClause extends SpanMultiTermQueryWrapper.SpanRewriteMethod { - private final long maxExpansions; - - TopTermSpanBooleanQueryRewriteWithMaxClause() { - this.maxExpansions = BooleanQuery.getMaxClauseCount(); - } - - @Override - public SpanQuery rewrite(IndexReader reader, MultiTermQuery query) throws IOException { - final MultiTermQuery.RewriteMethod delegate = new ScoringRewrite>() { - @Override - protected List getTopLevelBuilder() { - return new ArrayList(); - } - - @Override - protected Query build(List builder) { - return new SpanOrQuery((SpanQuery[]) builder.toArray(new SpanQuery[builder.size()])); - } - - @Override - protected void checkMaxClauseCount(int count) { - if (count > maxExpansions) { - throw new RuntimeException("[" + query.toString() + " ] " + - "exceeds maxClauseCount [ Boolean maxClauseCount is set to " + BooleanQuery.getMaxClauseCount() + "]"); - } - } - - @Override - protected void addClause(List topLevel, Term term, int docCount, float boost, TermStates states) { - SpanTermQuery q = new SpanTermQuery(term, states); - topLevel.add(q); - } - }; - return (SpanQuery) delegate.rewrite(reader, query); - } - } - @Override protected Query doToQuery(QueryShardContext context) throws IOException { - Query subQuery = multiTermQueryBuilder.toQuery(context); - float boost = AbstractQueryBuilder.DEFAULT_BOOST; - while (true) { - if (subQuery instanceof ConstantScoreQuery) { - subQuery = ((ConstantScoreQuery) subQuery).getQuery(); - boost = 1; - } else if (subQuery instanceof BoostQuery) { - BoostQuery boostQuery = (BoostQuery) subQuery; - subQuery = boostQuery.getQuery(); - boost *= boostQuery.getBoost(); - } else { - break; - } - } - // no MultiTermQuery extends SpanQuery, so SpanBoostQuery is not supported here - assert subQuery instanceof SpanBoostQuery == false; - - if (subQuery instanceof MatchNoDocsQuery) { - return new SpanMatchNoDocsQuery(multiTermQueryBuilder.fieldName(), subQuery.toString()); - } - - final SpanQuery spanQuery; - if (subQuery instanceof TermQuery) { - /** - * Text fields that index prefixes can rewrite prefix queries - * into term queries. See {@link TextFieldMapper.TextFieldType#prefixQuery}. - */ - if (multiTermQueryBuilder.getClass() != PrefixQueryBuilder.class) { - throw new UnsupportedOperationException("unsupported inner query generated by " + - multiTermQueryBuilder.getClass().getName() + ", should be " + MultiTermQuery.class.getName() - + " but was " + subQuery.getClass().getName()); - } - + if (multiTermQueryBuilder instanceof PrefixQueryBuilder) { PrefixQueryBuilder prefixBuilder = (PrefixQueryBuilder) multiTermQueryBuilder; - MappedFieldType fieldType = context.fieldMapper(prefixBuilder.fieldName()); - String fieldName = fieldType != null ? fieldType.name() : prefixBuilder.fieldName(); - - if (context.getIndexSettings().getIndexVersionCreated().before(Version.V_6_4_0)) { - /** - * Indices created in this version do not index positions on the prefix field - * so we cannot use it to match positional queries. Instead, we explicitly create the prefix - * query on the main field to avoid the rewrite. - */ - PrefixQuery prefixQuery = new PrefixQuery(new Term(fieldName, prefixBuilder.value())); - if (prefixBuilder.rewrite() != null) { - MultiTermQuery.RewriteMethod rewriteMethod = - QueryParsers.parseRewriteMethod(prefixBuilder.rewrite(), null, LoggingDeprecationHandler.INSTANCE); - prefixQuery.setRewriteMethod(rewriteMethod); + MappedFieldType fieldType = context.fieldMapper(multiTermQueryBuilder.fieldName()); + if (fieldType == null) { + return new SpanMatchNoDocsQuery(multiTermQueryBuilder.fieldName(), "unknown field"); + } + final SpanMultiTermQueryWrapper.SpanRewriteMethod spanRewriteMethod; + if (prefixBuilder.rewrite() != null) { + MultiTermQuery.RewriteMethod rewriteMethod = + QueryParsers.parseRewriteMethod(prefixBuilder.rewrite(), null, LoggingDeprecationHandler.INSTANCE); + if (rewriteMethod instanceof TopTermsRewrite) { + TopTermsRewrite innerRewrite = (TopTermsRewrite) rewriteMethod; + spanRewriteMethod = new SpanMultiTermQueryWrapper.TopTermsSpanBooleanQueryRewrite(innerRewrite.getSize()); + } else { + spanRewriteMethod = new SpanBooleanQueryRewriteWithMaxClause(); } - subQuery = prefixQuery; - spanQuery = new SpanMultiTermQueryWrapper<>(prefixQuery); } else { - /** - * Prefixes are indexed in a different field so we mask the term query with the original field - * name. This is required because span_near and span_or queries don't work across different field. - * The masking is safe because the prefix field is indexed using the same content than the original field - * and the prefix analyzer preserves positions. - */ - SpanTermQuery spanTermQuery = new SpanTermQuery(((TermQuery) subQuery).getTerm()); - spanQuery = new FieldMaskingSpanQuery(spanTermQuery, fieldName); + spanRewriteMethod = new SpanBooleanQueryRewriteWithMaxClause(); } + return fieldType.spanPrefixQuery(prefixBuilder.value(), spanRewriteMethod, context); } else { - if (subQuery instanceof MultiTermQuery == false) { + Query subQuery = multiTermQueryBuilder.toQuery(context); + while (true) { + if (subQuery instanceof ConstantScoreQuery) { + subQuery = ((ConstantScoreQuery) subQuery).getQuery(); + } else if (subQuery instanceof BoostQuery) { + BoostQuery boostQuery = (BoostQuery) subQuery; + subQuery = boostQuery.getQuery(); + } else { + break; + } + } + if (subQuery instanceof MatchNoDocsQuery) { + return new SpanMatchNoDocsQuery(multiTermQueryBuilder.fieldName(), subQuery.toString()); + } else if (subQuery instanceof MultiTermQuery == false) { throw new UnsupportedOperationException("unsupported inner query, should be " + MultiTermQuery.class.getName() + " but was " + subQuery.getClass().getName()); } - spanQuery = new SpanMultiTermQueryWrapper<>((MultiTermQuery) subQuery); - } - if (subQuery instanceof MultiTermQuery) { MultiTermQuery multiTermQuery = (MultiTermQuery) subQuery; - SpanMultiTermQueryWrapper wrapper = (SpanMultiTermQueryWrapper) spanQuery; + SpanMultiTermQueryWrapper wrapper = new SpanMultiTermQueryWrapper<>(multiTermQuery); if (multiTermQuery.getRewriteMethod() instanceof TopTermsRewrite == false) { - wrapper.setRewriteMethod(new TopTermSpanBooleanQueryRewriteWithMaxClause()); + wrapper.setRewriteMethod(new SpanBooleanQueryRewriteWithMaxClause()); } + return wrapper; } - if (boost != AbstractQueryBuilder.DEFAULT_BOOST) { - return new SpanBoostQuery(spanQuery, boost); - } - - return spanQuery; } @Override diff --git a/server/src/main/java/org/elasticsearch/index/search/MatchQuery.java b/server/src/main/java/org/elasticsearch/index/search/MatchQuery.java index 267f3a6951161..ad4b267eef643 100644 --- a/server/src/main/java/org/elasticsearch/index/search/MatchQuery.java +++ b/server/src/main/java/org/elasticsearch/index/search/MatchQuery.java @@ -20,43 +20,46 @@ package org.elasticsearch.index.search; import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.CachingTokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.miscellaneous.DisableGraphAttribute; +import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; +import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute; +import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute; import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.Term; import org.apache.lucene.queries.ExtendedCommonTermsQuery; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.search.BooleanQuery; -import org.apache.lucene.search.BoostQuery; import org.apache.lucene.search.FuzzyQuery; -import org.apache.lucene.search.MultiPhraseQuery; import org.apache.lucene.search.MultiTermQuery; -import org.apache.lucene.search.PhraseQuery; -import org.apache.lucene.search.PrefixQuery; import org.apache.lucene.search.Query; -import org.apache.lucene.search.SynonymQuery; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper; import org.apache.lucene.search.spans.SpanNearQuery; import org.apache.lucene.search.spans.SpanOrQuery; import org.apache.lucene.search.spans.SpanQuery; import org.apache.lucene.search.spans.SpanTermQuery; -import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.QueryBuilder; +import org.apache.lucene.util.graph.GraphTokenStreamFiniteStrings; import org.elasticsearch.ElasticsearchException; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.io.stream.Writeable; import org.elasticsearch.common.lucene.Lucene; -import org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery; import org.elasticsearch.common.lucene.search.Queries; +import org.elasticsearch.common.lucene.search.SpanBooleanQueryRewriteWithMaxClause; import org.elasticsearch.common.unit.Fuzziness; import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.query.QueryShardContext; import org.elasticsearch.index.query.support.QueryParsers; import java.io.IOException; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; +import java.util.function.Supplier; import static org.elasticsearch.common.lucene.search.Queries.newLenientFieldQuery; import static org.elasticsearch.common.lucene.search.Queries.newUnmappedFieldQuery; @@ -128,19 +131,10 @@ public void writeTo(StreamOutput out) throws IOException { } } - /** - * the default phrase slop - */ public static final int DEFAULT_PHRASE_SLOP = 0; - /** - * the default leniency setting - */ public static final boolean DEFAULT_LENIENCY = false; - /** - * the default zero terms query - */ public static final ZeroTermsQuery DEFAULT_ZERO_TERMS_QUERY = ZeroTermsQuery.NONE; protected final QueryShardContext context; @@ -159,6 +153,9 @@ public void writeTo(StreamOutput out) throws IOException { protected int maxExpansions = FuzzyQuery.defaultMaxExpansions; + protected SpanMultiTermQueryWrapper.SpanRewriteMethod spanRewriteMethod = + new SpanBooleanQueryRewriteWithMaxClause(FuzzyQuery.defaultMaxExpansions, false); + protected boolean transpositions = FuzzyQuery.defaultTranspositions; protected MultiTermQuery.RewriteMethod fuzzyRewriteMethod; @@ -212,6 +209,7 @@ public void setFuzzyPrefixLength(int fuzzyPrefixLength) { public void setMaxExpansions(int maxExpansions) { this.maxExpansions = maxExpansions; + this.spanRewriteMethod = new SpanBooleanQueryRewriteWithMaxClause(maxExpansions, false); } public void setTranspositions(boolean transpositions) { @@ -234,78 +232,83 @@ public void setAutoGenerateSynonymsPhraseQuery(boolean enabled) { this.autoGenerateSynonymsPhraseQuery = enabled; } - protected Analyzer getAnalyzer(MappedFieldType fieldType, boolean quoted) { - if (analyzer == null) { - return quoted ? context.getSearchQuoteAnalyzer(fieldType) : context.getSearchAnalyzer(fieldType); - } else { - return analyzer; - } - } - - private boolean hasPositions(MappedFieldType fieldType) { - return fieldType.indexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0; - } - public Query parse(Type type, String fieldName, Object value) throws IOException { - MappedFieldType fieldType = context.fieldMapper(fieldName); + final MappedFieldType fieldType = context.fieldMapper(fieldName); if (fieldType == null) { return newUnmappedFieldQuery(fieldName); } - final String field = fieldType.name(); - - Analyzer analyzer = getAnalyzer(fieldType, type == Type.PHRASE); + Analyzer analyzer = getAnalyzer(fieldType, type == Type.PHRASE || type == Type.PHRASE_PREFIX); assert analyzer != null; + MatchQueryBuilder builder = new MatchQueryBuilder(analyzer, fieldType); + /* * If a keyword analyzer is used, we know that further analysis isn't * needed and can immediately return a term query. */ - if (analyzer == Lucene.KEYWORD_ANALYZER) { - return blendTermQuery(new Term(fieldName, value.toString()), fieldType); + if (analyzer == Lucene.KEYWORD_ANALYZER + && type != Type.PHRASE_PREFIX) { + return builder.newTermQuery(new Term(fieldName, value.toString())); } - MatchQueryBuilder builder = new MatchQueryBuilder(analyzer, fieldType); - builder.setEnablePositionIncrements(this.enablePositionIncrements); - if (hasPositions(fieldType)) { - builder.setAutoGenerateMultiTermSynonymsPhraseQuery(this.autoGenerateSynonymsPhraseQuery); - } else { - builder.setAutoGenerateMultiTermSynonymsPhraseQuery(false); - } + return parseInternal(type, fieldName, builder, value); + } - Query query = null; + protected final Query parseInternal(Type type, String fieldName, MatchQueryBuilder builder, Object value) throws IOException { + final Query query; switch (type) { case BOOLEAN: if (commonTermsCutoff == null) { - query = builder.createBooleanQuery(field, value.toString(), occur); + query = builder.createBooleanQuery(fieldName, value.toString(), occur); } else { - query = builder.createCommonTermsQuery(field, value.toString(), occur, occur, commonTermsCutoff); + query = createCommonTermsQuery(builder, fieldName, value.toString(), occur, occur, commonTermsCutoff); } break; + case PHRASE: - query = builder.createPhraseQuery(field, value.toString(), phraseSlop); + query = builder.createPhraseQuery(fieldName, value.toString(), phraseSlop); break; + case PHRASE_PREFIX: - query = builder.createPhrasePrefixQuery(field, value.toString(), phraseSlop, maxExpansions); + query = builder.createPhrasePrefixQuery(fieldName, value.toString(), phraseSlop); break; + default: throw new IllegalStateException("No type found for [" + type + "]"); } - if (query == null) { - return zeroTermsQuery(); - } else { - return query; + return query == null ? zeroTermsQuery() : query; + } + + private Query createCommonTermsQuery(MatchQueryBuilder builder, String field, String queryText, + Occur highFreqOccur, Occur lowFreqOccur, float maxTermFrequency) { + Query booleanQuery = builder.createBooleanQuery(field, queryText, lowFreqOccur); + if (booleanQuery != null && booleanQuery instanceof BooleanQuery) { + BooleanQuery bq = (BooleanQuery) booleanQuery; + return boolToExtendedCommonTermsQuery(bq, highFreqOccur, lowFreqOccur, maxTermFrequency); } + return booleanQuery; } - protected final Query termQuery(MappedFieldType fieldType, BytesRef value, boolean lenient) { - try { - return fieldType.termQuery(value, context); - } catch (RuntimeException e) { - if (lenient) { - return newLenientFieldQuery(fieldType.name(), e); + private Query boolToExtendedCommonTermsQuery(BooleanQuery bq, + Occur highFreqOccur, + Occur lowFreqOccur, + float maxTermFrequency) { + ExtendedCommonTermsQuery query = new ExtendedCommonTermsQuery(highFreqOccur, lowFreqOccur, maxTermFrequency); + for (BooleanClause clause : bq.clauses()) { + if ((clause.getQuery() instanceof TermQuery) == false) { + return bq; } - throw e; + query.add(((TermQuery) clause.getQuery()).getTerm()); + } + return query; + } + + protected Analyzer getAnalyzer(MappedFieldType fieldType, boolean quoted) { + if (analyzer == null) { + return quoted ? context.getSearchQuoteAnalyzer(fieldType) : context.getSearchAnalyzer(fieldType); + } else { + return analyzer; } } @@ -322,216 +325,345 @@ protected Query zeroTermsQuery() { } } - private class MatchQueryBuilder extends QueryBuilder { + private boolean hasPositions(MappedFieldType fieldType) { + return fieldType.indexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0; + } - private final MappedFieldType mapper; + class MatchQueryBuilder extends QueryBuilder { + private final MappedFieldType fieldType; /** * Creates a new QueryBuilder using the given analyzer. */ - MatchQueryBuilder(Analyzer analyzer, MappedFieldType mapper) { + MatchQueryBuilder(Analyzer analyzer, MappedFieldType fieldType) { super(analyzer); - this.mapper = mapper; + this.fieldType = fieldType; + if (hasPositions(fieldType)) { + setAutoGenerateMultiTermSynonymsPhraseQuery(autoGenerateSynonymsPhraseQuery); + } else { + setAutoGenerateMultiTermSynonymsPhraseQuery(false); + } + setEnablePositionIncrements(enablePositionIncrements); } @Override - protected Query newTermQuery(Term term) { - return blendTermQuery(term, mapper); + protected Query createFieldQuery(Analyzer analyzer, BooleanClause.Occur operator, String field, + String queryText, boolean quoted, int slop) { + assert operator == BooleanClause.Occur.SHOULD || operator == BooleanClause.Occur.MUST; + Type type = quoted ? Type.PHRASE : Type.BOOLEAN; + return createQuery(field, queryText, type, operator, slop); } - @Override - protected Query newSynonymQuery(Term[] terms) { - return blendTermsQuery(terms, mapper); + public Query createPhrasePrefixQuery(String field, String queryText, int slop) { + return createQuery(field, queryText, Type.PHRASE_PREFIX, occur, slop); } - @Override - protected Query analyzePhrase(String field, TokenStream stream, int slop) throws IOException { - try { - checkForPositions(field); - Query query = mapper.phraseQuery(field, stream, slop, enablePositionIncrements); - if (query instanceof PhraseQuery) { - // synonyms that expand to multiple terms can return a phrase query. - return blendPhraseQuery((PhraseQuery) query, mapper); - } - return query; - } catch (IllegalArgumentException | IllegalStateException e) { - if (lenient) { - return newLenientFieldQuery(field, e); + private Query createFieldQuery(TokenStream source, Type type, BooleanClause.Occur operator, String field, int phraseSlop) { + assert operator == BooleanClause.Occur.SHOULD || operator == BooleanClause.Occur.MUST; + + // Build an appropriate query based on the analysis chain. + try (CachingTokenFilter stream = new CachingTokenFilter(source)) { + + TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class); + PositionIncrementAttribute posIncAtt = stream.addAttribute(PositionIncrementAttribute.class); + PositionLengthAttribute posLenAtt = stream.addAttribute(PositionLengthAttribute.class); + + if (termAtt == null) { + return null; } - throw e; - } - } - @Override - protected Query analyzeMultiPhrase(String field, TokenStream stream, int slop) throws IOException { - try { - checkForPositions(field); - return mapper.multiPhraseQuery(field, stream, slop, enablePositionIncrements); - } catch (IllegalArgumentException | IllegalStateException e) { - if (lenient) { - return newLenientFieldQuery(field, e); + // phase 1: read through the stream and assess the situation: + // counting the number of tokens/positions and marking if we have any synonyms. + + int numTokens = 0; + int positionCount = 0; + boolean hasSynonyms = false; + boolean isGraph = false; + + stream.reset(); + while (stream.incrementToken()) { + numTokens++; + int positionIncrement = posIncAtt.getPositionIncrement(); + if (positionIncrement != 0) { + positionCount += positionIncrement; + } else { + hasSynonyms = true; + } + + int positionLength = posLenAtt.getPositionLength(); + if (enableGraphQueries && positionLength > 1) { + isGraph = true; + } } - throw e; - } - } - private void checkForPositions(String field) { - if (hasPositions(mapper) == false) { - throw new IllegalStateException("field:[" + field + "] was indexed without position data; cannot run PhraseQuery"); + // phase 2: based on token count, presence of synonyms, and options + // formulate a single term, boolean, or phrase. + if (numTokens == 0) { + return null; + } else if (numTokens == 1) { + // single term + if (type == Type.PHRASE_PREFIX) { + return analyzePhrasePrefix(field, stream, phraseSlop, positionCount); + } else { + return analyzeTerm(field, stream); + } + } else if (isGraph) { + // graph + if (type == Type.PHRASE || type == Type.PHRASE_PREFIX) { + return analyzeGraphPhrase(stream, field, type, phraseSlop); + } else { + return analyzeGraphBoolean(field, stream, operator); + } + } else if (type == Type.PHRASE && positionCount > 1) { + // phrase + if (hasSynonyms) { + // complex phrase with synonyms + return analyzeMultiPhrase(field, stream, phraseSlop); + } else { + // simple phrase + return analyzePhrase(field, stream, phraseSlop); + } + } else if (type == Type.PHRASE_PREFIX) { + // phrase prefix + return analyzePhrasePrefix(field, stream, phraseSlop, positionCount); + } else { + // boolean + if (positionCount == 1) { + // only one position, with synonyms + return analyzeBoolean(field, stream); + } else { + // complex case: multiple positions + return analyzeMultiBoolean(field, stream, operator); + } + } + } catch (IOException e) { + throw new RuntimeException("Error analyzing query text", e); } } - /** - * Checks if graph analysis should be enabled for the field depending - * on the provided {@link Analyzer} - */ - @Override - protected Query createFieldQuery(Analyzer analyzer, BooleanClause.Occur operator, String field, - String queryText, boolean quoted, int phraseSlop) { - assert operator == BooleanClause.Occur.SHOULD || operator == BooleanClause.Occur.MUST; - + private Query createQuery(String field, String queryText, Type type, BooleanClause.Occur operator, int phraseSlop) { // Use the analyzer to get all the tokens, and then build an appropriate // query based on the analysis chain. try (TokenStream source = analyzer.tokenStream(field, queryText)) { if (source.hasAttribute(DisableGraphAttribute.class)) { /* - A {@link TokenFilter} in this {@link TokenStream} disabled the graph analysis to avoid - paths explosion. See {@link org.elasticsearch.index.analysis.ShingleTokenFilterFactory} for details. + * A {@link TokenFilter} in this {@link TokenStream} disabled the graph analysis to avoid + * paths explosion. See {@link org.elasticsearch.index.analysis.ShingleTokenFilterFactory} for details. */ setEnableGraphQueries(false); } - Query query = super.createFieldQuery(source, operator, field, quoted, phraseSlop); - setEnableGraphQueries(true); - return query; + try { + return createFieldQuery(source, type, operator, field, phraseSlop); + } finally { + setEnableGraphQueries(true); + } } catch (IOException e) { throw new RuntimeException("Error analyzing query text", e); } } - public Query createPhrasePrefixQuery(String field, String queryText, int phraseSlop, int maxExpansions) { - final Query query = createFieldQuery(getAnalyzer(), Occur.MUST, field, queryText, true, phraseSlop); - return toMultiPhrasePrefix(query, phraseSlop, maxExpansions); + private SpanQuery newSpanQuery(Term[] terms, boolean prefix) { + if (terms.length == 1) { + return prefix ? fieldType.spanPrefixQuery(terms[0].text(), spanRewriteMethod, context) : new SpanTermQuery(terms[0]); + } + SpanQuery[] spanQueries = new SpanQuery[terms.length]; + for (int i = 0; i < terms.length; i++) { + spanQueries[i] = prefix ? new SpanTermQuery(terms[i]) : + fieldType.spanPrefixQuery(terms[i].text(), spanRewriteMethod, context); + } + return new SpanOrQuery(spanQueries); } - private Query toMultiPhrasePrefix(final Query query, int phraseSlop, int maxExpansions) { - float boost = 1; - Query innerQuery = query; - while (innerQuery instanceof BoostQuery) { - BoostQuery bq = (BoostQuery) innerQuery; - boost *= bq.getBoost(); - innerQuery = bq.getQuery(); - } - if (query instanceof SpanQuery) { - return toSpanQueryPrefix((SpanQuery) query, boost); + @Override + protected SpanQuery createSpanQuery(TokenStream in, String field) throws IOException { + return createSpanQuery(in, field, false); + } + + private SpanQuery createSpanQuery(TokenStream in, String field, boolean prefix) throws IOException { + TermToBytesRefAttribute termAtt = in.getAttribute(TermToBytesRefAttribute.class); + PositionIncrementAttribute posIncAtt = in.getAttribute(PositionIncrementAttribute.class); + if (termAtt == null) { + return null; } - final MultiPhrasePrefixQuery prefixQuery = new MultiPhrasePrefixQuery(); - prefixQuery.setMaxExpansions(maxExpansions); - prefixQuery.setSlop(phraseSlop); - if (innerQuery instanceof PhraseQuery) { - PhraseQuery pq = (PhraseQuery) innerQuery; - Term[] terms = pq.getTerms(); - int[] positions = pq.getPositions(); - for (int i = 0; i < terms.length; i++) { - prefixQuery.add(new Term[]{terms[i]}, positions[i]); + + SpanNearQuery.Builder builder = new SpanNearQuery.Builder(field, true); + Term lastTerm = null; + while (in.incrementToken()) { + if (posIncAtt.getPositionIncrement() > 1) { + builder.addGap(posIncAtt.getPositionIncrement()-1); } - return boost == 1 ? prefixQuery : new BoostQuery(prefixQuery, boost); - } else if (innerQuery instanceof MultiPhraseQuery) { - MultiPhraseQuery pq = (MultiPhraseQuery) innerQuery; - Term[][] terms = pq.getTermArrays(); - int[] positions = pq.getPositions(); - for (int i = 0; i < terms.length; i++) { - prefixQuery.add(terms[i], positions[i]); + if (lastTerm != null) { + builder.addClause(new SpanTermQuery(lastTerm)); } - return boost == 1 ? prefixQuery : new BoostQuery(prefixQuery, boost); - } else if (innerQuery instanceof TermQuery) { - prefixQuery.add(((TermQuery) innerQuery).getTerm()); - return boost == 1 ? prefixQuery : new BoostQuery(prefixQuery, boost); + lastTerm = new Term(field, termAtt.getBytesRef()); + } + if (lastTerm != null) { + SpanQuery spanQuery = prefix ? + fieldType.spanPrefixQuery(lastTerm.text(), spanRewriteMethod, context) : new SpanTermQuery(lastTerm); + builder.addClause(spanQuery); + } + SpanNearQuery query = builder.build(); + SpanQuery[] clauses = query.getClauses(); + if (clauses.length == 1) { + return clauses[0]; + } else { + return query; } - return query; } - private Query toSpanQueryPrefix(SpanQuery query, float boost) { - if (query instanceof SpanTermQuery) { - SpanMultiTermQueryWrapper ret = - new SpanMultiTermQueryWrapper<>(new PrefixQuery(((SpanTermQuery) query).getTerm())); - return boost == 1 ? ret : new BoostQuery(ret, boost); - } else if (query instanceof SpanNearQuery) { - SpanNearQuery spanNearQuery = (SpanNearQuery) query; - SpanQuery[] clauses = spanNearQuery.getClauses(); - if (clauses[clauses.length - 1] instanceof SpanTermQuery) { - clauses[clauses.length - 1] = new SpanMultiTermQueryWrapper<>( - new PrefixQuery(((SpanTermQuery) clauses[clauses.length - 1]).getTerm()) - ); - } - SpanNearQuery newQuery = new SpanNearQuery(clauses, spanNearQuery.getSlop(), spanNearQuery.isInOrder()); - return boost == 1 ? newQuery : new BoostQuery(newQuery, boost); - } else if (query instanceof SpanOrQuery) { - SpanOrQuery orQuery = (SpanOrQuery) query; - SpanQuery[] clauses = new SpanQuery[orQuery.getClauses().length]; - for (int i = 0; i < clauses.length; i++) { - clauses[i] = (SpanQuery) toSpanQueryPrefix(orQuery.getClauses()[i], 1); - } - return boost == 1 ? new SpanOrQuery(clauses) : new BoostQuery(new SpanOrQuery(clauses), boost); + @Override + protected Query newTermQuery(Term term) { + Supplier querySupplier; + if (fuzziness != null) { + querySupplier = () -> { + Query query = fieldType.fuzzyQuery(term.text(), fuzziness, fuzzyPrefixLength, maxExpansions, transpositions); + if (query instanceof FuzzyQuery) { + QueryParsers.setRewriteMethod((FuzzyQuery) query, fuzzyRewriteMethod); + } + return query; + }; } else { + querySupplier = () -> fieldType.termQuery(term.bytes(), context); + } + try { + Query query = querySupplier.get(); return query; + } catch (RuntimeException e) { + if (lenient) { + return newLenientFieldQuery(fieldType.name(), e); + } else { + throw e; + } } } - public Query createCommonTermsQuery(String field, String queryText, - Occur highFreqOccur, - Occur lowFreqOccur, - float maxTermFrequency) { - Query booleanQuery = createBooleanQuery(field, queryText, lowFreqOccur); - if (booleanQuery != null && booleanQuery instanceof BooleanQuery) { - BooleanQuery bq = (BooleanQuery) booleanQuery; - return boolToExtendedCommonTermsQuery(bq, highFreqOccur, lowFreqOccur, maxTermFrequency); + @Override + protected Query analyzePhrase(String field, TokenStream stream, int slop) throws IOException { + try { + checkForPositions(field); + return fieldType.phraseQuery(stream, slop, enablePositionIncrements); + } catch (IllegalArgumentException | IllegalStateException e) { + if (lenient) { + return newLenientFieldQuery(field, e); + } + throw e; } - return booleanQuery; } - private Query boolToExtendedCommonTermsQuery(BooleanQuery bq, - Occur highFreqOccur, - Occur lowFreqOccur, - float maxTermFrequency) { - ExtendedCommonTermsQuery query = new ExtendedCommonTermsQuery(highFreqOccur, lowFreqOccur, maxTermFrequency); - for (BooleanClause clause : bq.clauses()) { - if (!(clause.getQuery() instanceof TermQuery)) { - return bq; + @Override + protected Query analyzeMultiPhrase(String field, TokenStream stream, int slop) throws IOException { + try { + checkForPositions(field); + return fieldType.multiPhraseQuery(stream, slop, enablePositionIncrements); + } catch (IllegalArgumentException | IllegalStateException e) { + if (lenient) { + return newLenientFieldQuery(field, e); } - query.add(((TermQuery) clause.getQuery()).getTerm()); + throw e; } - return query; } - } - - /** - * Called when a phrase query is built with {@link QueryBuilder#analyzePhrase(String, TokenStream, int)}. - * Subclass can override this function to blend this query to multiple fields. - */ - protected Query blendPhraseQuery(PhraseQuery query, MappedFieldType fieldType) { - return query; - } - - protected Query blendTermsQuery(Term[] terms, MappedFieldType fieldType) { - return new SynonymQuery(terms); - } - protected Query blendTermQuery(Term term, MappedFieldType fieldType) { - if (fuzziness != null) { + private Query analyzePhrasePrefix(String field, TokenStream stream, int slop, int positionCount) throws IOException { try { - Query query = fieldType.fuzzyQuery(term.text(), fuzziness, fuzzyPrefixLength, maxExpansions, transpositions); - if (query instanceof FuzzyQuery) { - QueryParsers.setRewriteMethod((FuzzyQuery) query, fuzzyRewriteMethod); + if (positionCount > 1) { + checkForPositions(field); } - return query; - } catch (RuntimeException e) { + return fieldType.phrasePrefixQuery(stream, slop, maxExpansions); + } catch (IllegalArgumentException | IllegalStateException e) { if (lenient) { - return newLenientFieldQuery(fieldType.name(), e); + return newLenientFieldQuery(field, e); + } + throw e; + } + } + + private Query analyzeGraphPhrase(TokenStream source, String field, Type type, int slop) throws IOException { + assert type == Type.PHRASE_PREFIX || type == Type.PHRASE; + + source.reset(); + GraphTokenStreamFiniteStrings graph = new GraphTokenStreamFiniteStrings(source); + if (phraseSlop > 0) { + /* + * Creates a boolean query from the graph token stream by extracting all the finite strings from the graph + * and using them to create phrase queries with the appropriate slop. + */ + BooleanQuery.Builder builder = new BooleanQuery.Builder(); + Iterator it = graph.getFiniteStrings(); + while (it.hasNext()) { + Query query = createFieldQuery(it.next(), type, BooleanClause.Occur.MUST, field, slop); + if (query != null) { + builder.add(query, BooleanClause.Occur.SHOULD); + } + } + return builder.build(); + } + + /* + * Creates a span near (phrase) query from a graph token stream. + * The articulation points of the graph are visited in order and the queries + * created at each point are merged in the returned near query. + */ + List clauses = new ArrayList<>(); + int[] articulationPoints = graph.articulationPoints(); + int lastState = 0; + int maxClauseCount = BooleanQuery.getMaxClauseCount(); + for (int i = 0; i <= articulationPoints.length; i++) { + int start = lastState; + int end = -1; + if (i < articulationPoints.length) { + end = articulationPoints[i]; + } + lastState = end; + final SpanQuery queryPos; + boolean endPrefix = end == -1 && type == Type.PHRASE_PREFIX; + if (graph.hasSidePath(start)) { + List queries = new ArrayList<>(); + Iterator it = graph.getFiniteStrings(start, end); + while (it.hasNext()) { + TokenStream ts = it.next(); + SpanQuery q = createSpanQuery(ts, field, endPrefix); + if (q != null) { + if (queries.size() >= maxClauseCount) { + throw new BooleanQuery.TooManyClauses(); + } + queries.add(q); + } + } + if (queries.size() > 0) { + queryPos = new SpanOrQuery(queries.toArray(new SpanQuery[0])); + } else { + queryPos = null; + } } else { - throw e; + Term[] terms = graph.getTerms(field, start); + assert terms.length > 0; + if (terms.length >= maxClauseCount) { + throw new BooleanQuery.TooManyClauses(); + } + queryPos = newSpanQuery(terms, endPrefix); + } + + if (queryPos != null) { + if (clauses.size() >= maxClauseCount) { + throw new BooleanQuery.TooManyClauses(); + } + clauses.add(queryPos); } } + + if (clauses.isEmpty()) { + return null; + } else if (clauses.size() == 1) { + return clauses.get(0); + } else { + return new SpanNearQuery(clauses.toArray(new SpanQuery[0]), 0, true); + } + } + + private void checkForPositions(String field) { + if (hasPositions(fieldType) == false) { + throw new IllegalStateException("field:[" + field + "] was indexed without position data; cannot run PhraseQuery"); + } } - return termQuery(fieldType, term.bytes(), lenient); } } diff --git a/server/src/main/java/org/elasticsearch/index/search/MultiMatchQuery.java b/server/src/main/java/org/elasticsearch/index/search/MultiMatchQuery.java index 6f57faba001c9..7eefaadaadde2 100644 --- a/server/src/main/java/org/elasticsearch/index/search/MultiMatchQuery.java +++ b/server/src/main/java/org/elasticsearch/index/search/MultiMatchQuery.java @@ -20,12 +20,12 @@ package org.elasticsearch.index.search; import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.index.Term; import org.apache.lucene.queries.BlendedTermQuery; import org.apache.lucene.search.BoostQuery; import org.apache.lucene.search.DisjunctionMaxQuery; import org.apache.lucene.search.MatchNoDocsQuery; -import org.apache.lucene.search.PhraseQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.TermQuery; import org.apache.lucene.util.BytesRef; @@ -49,222 +49,182 @@ public class MultiMatchQuery extends MatchQuery { private Float groupTieBreaker = null; - public void setTieBreaker(float tieBreaker) { - this.groupTieBreaker = tieBreaker; - } - public MultiMatchQuery(QueryShardContext context) { super(context); } - private Query parseAndApply(Type type, String fieldName, Object value, - String minimumShouldMatch, Float boostValue) throws IOException { - Query query = parse(type, fieldName, value); - query = Queries.maybeApplyMinimumShouldMatch(query, minimumShouldMatch); - if (query != null && boostValue != null && - boostValue != AbstractQueryBuilder.DEFAULT_BOOST && query instanceof MatchNoDocsQuery == false) { - query = new BoostQuery(query, boostValue); - } - return query; + public void setTieBreaker(float tieBreaker) { + this.groupTieBreaker = tieBreaker; } public Query parse(MultiMatchQueryBuilder.Type type, Map fieldNames, - Object value, String minimumShouldMatch) throws IOException { - final Query result; - // reset query builder - queryBuilder = null; - if (fieldNames.size() == 1) { - Map.Entry fieldBoost = fieldNames.entrySet().iterator().next(); - Float boostValue = fieldBoost.getValue(); - result = parseAndApply(type.matchQueryType(), fieldBoost.getKey(), value, minimumShouldMatch, boostValue); - } else { - final float tieBreaker = groupTieBreaker == null ? type.tieBreaker() : groupTieBreaker; - switch (type) { - case PHRASE: - case PHRASE_PREFIX: - case BEST_FIELDS: - case MOST_FIELDS: - queryBuilder = new QueryBuilder(tieBreaker); - break; - case CROSS_FIELDS: - queryBuilder = new CrossFieldsQueryBuilder(tieBreaker); - break; - default: - throw new IllegalStateException("No such type: " + type); - } - final List queries = queryBuilder.buildGroupedQueries(type, fieldNames, value, minimumShouldMatch); - result = queryBuilder.combineGrouped(queries); + Object value, String minimumShouldMatch) throws IOException { + final float tieBreaker = groupTieBreaker == null ? type.tieBreaker() : groupTieBreaker; + final List queries; + switch (type) { + case PHRASE: + case PHRASE_PREFIX: + case BEST_FIELDS: + case MOST_FIELDS: + queries = buildFieldQueries(type, fieldNames, value, minimumShouldMatch); + break; + + case CROSS_FIELDS: + queries = buildCrossFieldQuery(type, fieldNames, value, minimumShouldMatch, tieBreaker); + break; + + default: + throw new IllegalStateException("No such type: " + type); } - return result; + return combineGrouped(queries, tieBreaker); } - private QueryBuilder queryBuilder; - - public class QueryBuilder { - protected final float tieBreaker; - - public QueryBuilder(float tieBreaker) { - this.tieBreaker = tieBreaker; - } - - public List buildGroupedQueries(MultiMatchQueryBuilder.Type type, Map fieldNames, - Object value, String minimumShouldMatch) throws IOException{ - List queries = new ArrayList<>(); - for (String fieldName : fieldNames.keySet()) { - Float boostValue = fieldNames.get(fieldName); - Query query = parseGroup(type.matchQueryType(), fieldName, boostValue, value, minimumShouldMatch); - if (query != null) { - queries.add(query); - } - } - return queries; + private Query combineGrouped(List groupQuery, float tieBreaker) { + if (groupQuery.isEmpty()) { + return zeroTermsQuery(); } - - Query parseGroup(Type type, String field, Float boostValue, Object value, String minimumShouldMatch) throws IOException { - if (context.fieldMapper(field) == null) { - return null; // indicates to the caller that this field is unmapped and should be disregarded - } - return parseAndApply(type, field, value, minimumShouldMatch, boostValue); + if (groupQuery.size() == 1) { + return groupQuery.get(0); } + return new DisjunctionMaxQuery(groupQuery, tieBreaker); + } - private Query combineGrouped(List groupQuery) { - if (groupQuery == null || groupQuery.isEmpty()) { - return zeroTermsQuery(); + private List buildFieldQueries(MultiMatchQueryBuilder.Type type, Map fieldNames, + Object value, String minimumShouldMatch) throws IOException{ + List queries = new ArrayList<>(); + for (String fieldName : fieldNames.keySet()) { + if (context.fieldMapper(fieldName) == null) { + // ignore unmapped fields + continue; } - if (groupQuery.size() == 1) { - return groupQuery.get(0); + Float boostValue = fieldNames.get(fieldName); + Query query = parse(type.matchQueryType(), fieldName, value); + query = Queries.maybeApplyMinimumShouldMatch(query, minimumShouldMatch); + if (query != null + && boostValue != null + && boostValue != AbstractQueryBuilder.DEFAULT_BOOST + && query instanceof MatchNoDocsQuery == false) { + query = new BoostQuery(query, boostValue); } - List queries = new ArrayList<>(); - for (Query query : groupQuery) { + if (query != null) { queries.add(query); } - return new DisjunctionMaxQuery(queries, tieBreaker); - } - - public Query blendTerm(Term term, MappedFieldType fieldType) { - return MultiMatchQuery.super.blendTermQuery(term, fieldType); - } - - public Query blendTerms(Term[] terms, MappedFieldType fieldType) { - return MultiMatchQuery.super.blendTermsQuery(terms, fieldType); - } - - public Query termQuery(MappedFieldType fieldType, BytesRef value) { - return MultiMatchQuery.this.termQuery(fieldType, value, lenient); - } - - public Query blendPhrase(PhraseQuery query, MappedFieldType type) { - return MultiMatchQuery.super.blendPhraseQuery(query, type); } + return queries; } - final class CrossFieldsQueryBuilder extends QueryBuilder { - private FieldAndFieldType[] blendedFields; - - CrossFieldsQueryBuilder(float tiebreaker) { - super(tiebreaker); - } - - @Override - public List buildGroupedQueries(MultiMatchQueryBuilder.Type type, Map fieldNames, - Object value, String minimumShouldMatch) throws IOException { - Map> groups = new HashMap<>(); - List queries = new ArrayList<>(); - for (Map.Entry entry : fieldNames.entrySet()) { - String name = entry.getKey(); - MappedFieldType fieldType = context.fieldMapper(name); - if (fieldType != null) { - Analyzer actualAnalyzer = getAnalyzer(fieldType, type == MultiMatchQueryBuilder.Type.PHRASE); - name = fieldType.name(); - if (!groups.containsKey(actualAnalyzer)) { - groups.put(actualAnalyzer, new ArrayList<>()); - } - Float boost = entry.getValue(); - boost = boost == null ? Float.valueOf(1.0f) : boost; - groups.get(actualAnalyzer).add(new FieldAndFieldType(fieldType, boost)); - } else { - queries.add(new MatchNoDocsQuery("unknown field " + name)); + private List buildCrossFieldQuery(MultiMatchQueryBuilder.Type type, Map fieldNames, + Object value, String minimumShouldMatch, float tieBreaker) throws IOException { + Map> groups = new HashMap<>(); + List queries = new ArrayList<>(); + for (Map.Entry entry : fieldNames.entrySet()) { + String name = entry.getKey(); + MappedFieldType fieldType = context.fieldMapper(name); + if (fieldType != null) { + Analyzer actualAnalyzer = getAnalyzer(fieldType, type == MultiMatchQueryBuilder.Type.PHRASE); + if (!groups.containsKey(actualAnalyzer)) { + groups.put(actualAnalyzer, new ArrayList<>()); } + float boost = entry.getValue() == null ? 1.0f : entry.getValue(); + groups.get(actualAnalyzer).add(new FieldAndBoost(fieldType, boost)); + } + } + for (Map.Entry> group : groups.entrySet()) { + final MatchQueryBuilder builder; + if (group.getValue().size() == 1) { + builder = new MatchQueryBuilder(group.getKey(), group.getValue().get(0).fieldType); + } else { + builder = new BlendedQueryBuilder(group.getKey(), group.getValue(), tieBreaker); } - for (List group : groups.values()) { - if (group.size() > 1) { - blendedFields = new FieldAndFieldType[group.size()]; - int i = 0; - for (FieldAndFieldType fieldAndFieldType : group) { - blendedFields[i++] = fieldAndFieldType; + + /* + * We have to pick some field to pass through the superclass so + * we just pick the first field. It shouldn't matter because + * fields are already grouped by their analyzers/types. + */ + String representativeField = group.getValue().get(0).fieldType.name(); + Query query = parseInternal(type.matchQueryType(), representativeField, builder, value); + query = Queries.maybeApplyMinimumShouldMatch(query, minimumShouldMatch); + if (query != null) { + if (group.getValue().size() == 1) { + // apply the field boost to groups that contain a single field + float boost = group.getValue().get(0).boost; + if (boost != AbstractQueryBuilder.DEFAULT_BOOST) { + query = new BoostQuery(query, boost); } - } else { - blendedFields = null; - } - /* - * We have to pick some field to pass through the superclass so - * we just pick the first field. It shouldn't matter because - * fields are already grouped by their analyzers/types. - */ - String representativeField = group.get(0).fieldType.name(); - Query q = parseGroup(type.matchQueryType(), representativeField, 1f, value, minimumShouldMatch); - if (q != null) { - queries.add(q); } + queries.add(query); } + } + + return queries; + } + + private class BlendedQueryBuilder extends MatchQueryBuilder { + private final List blendedFields; + private final float tieBreaker; - return queries.isEmpty() ? null : queries; + BlendedQueryBuilder(Analyzer analyzer, List blendedFields, float tieBreaker) { + super(analyzer, blendedFields.get(0).fieldType); + this.blendedFields = blendedFields; + this.tieBreaker = tieBreaker; } @Override - public Query blendTerms(Term[] terms, MappedFieldType fieldType) { - if (blendedFields == null || blendedFields.length == 1) { - return super.blendTerms(terms, fieldType); - } + protected Query newSynonymQuery(Term[] terms) { BytesRef[] values = new BytesRef[terms.length]; for (int i = 0; i < terms.length; i++) { values[i] = terms[i].bytes(); } - return MultiMatchQuery.blendTerms(context, values, commonTermsCutoff, tieBreaker, lenient, blendedFields); + return blendTerms(context, values, commonTermsCutoff, tieBreaker, lenient, blendedFields); } @Override - public Query blendTerm(Term term, MappedFieldType fieldType) { - if (blendedFields == null) { - return super.blendTerm(term, fieldType); - } - return MultiMatchQuery.blendTerm(context, term.bytes(), commonTermsCutoff, tieBreaker, lenient, blendedFields); + public Query newTermQuery(Term term) { + return blendTerm(context, term.bytes(), commonTermsCutoff, tieBreaker, lenient, blendedFields); } @Override - public Query termQuery(MappedFieldType fieldType, BytesRef value) { - /* - * Use the string value of the term because we're reusing the - * portion of the query is usually after the analyzer has run on - * each term. We just skip that analyzer phase. - */ - return blendTerm(new Term(fieldType.name(), value.utf8ToString()), fieldType); + protected Query analyzePhrase(String field, TokenStream stream, int slop) throws IOException { + List disjunctions = new ArrayList<>(); + for (FieldAndBoost fieldType : blendedFields) { + Query query = fieldType.fieldType.phraseQuery(stream, slop, enablePositionIncrements); + if (fieldType.boost != 1f) { + query = new BoostQuery(query, fieldType.boost); + } + disjunctions.add(query); + } + return new DisjunctionMaxQuery(disjunctions, tieBreaker); } @Override - public Query blendPhrase(PhraseQuery query, MappedFieldType type) { - if (blendedFields == null) { - return super.blendPhrase(query, type); + protected Query analyzeMultiPhrase(String field, TokenStream stream, int slop) throws IOException { + List disjunctions = new ArrayList<>(); + for (FieldAndBoost fieldType : blendedFields) { + Query query = fieldType.fieldType.multiPhraseQuery(stream, slop, enablePositionIncrements); + if (fieldType.boost != 1f) { + query = new BoostQuery(query, fieldType.boost); + } + disjunctions.add(query); } - /** - * We build phrase queries for multi-word synonyms when {@link QueryBuilder#autoGenerateSynonymsPhraseQuery} is true. - */ - return MultiMatchQuery.blendPhrase(query, tieBreaker, blendedFields); + return new DisjunctionMaxQuery(disjunctions, tieBreaker); } } static Query blendTerm(QueryShardContext context, BytesRef value, Float commonTermsCutoff, float tieBreaker, - boolean lenient, FieldAndFieldType... blendedFields) { + boolean lenient, List blendedFields) { + return blendTerms(context, new BytesRef[] {value}, commonTermsCutoff, tieBreaker, lenient, blendedFields); } static Query blendTerms(QueryShardContext context, BytesRef[] values, Float commonTermsCutoff, float tieBreaker, - boolean lenient, FieldAndFieldType... blendedFields) { + boolean lenient, List blendedFields) { + List queries = new ArrayList<>(); - Term[] terms = new Term[blendedFields.length * values.length]; - float[] blendedBoost = new float[blendedFields.length * values.length]; + Term[] terms = new Term[blendedFields.size() * values.length]; + float[] blendedBoost = new float[blendedFields.size() * values.length]; int i = 0; - for (FieldAndFieldType ft : blendedFields) { + for (FieldAndBoost ft : blendedFields) { for (BytesRef term : values) { Query query; try { @@ -309,61 +269,15 @@ static Query blendTerms(QueryShardContext context, BytesRef[] values, Float comm // best effort: add clauses that are not term queries so that they have an opportunity to match // however their score contribution will be different // TODO: can we improve this? - return new DisjunctionMaxQuery(queries, 1.0f); - } - } - - /** - * Expand a {@link PhraseQuery} to multiple fields that share the same analyzer. - * Returns a {@link DisjunctionMaxQuery} with a disjunction for each expanded field. - */ - static Query blendPhrase(PhraseQuery query, float tiebreaker, FieldAndFieldType... fields) { - List disjunctions = new ArrayList<>(); - for (FieldAndFieldType field : fields) { - int[] positions = query.getPositions(); - Term[] terms = query.getTerms(); - PhraseQuery.Builder builder = new PhraseQuery.Builder(); - for (int i = 0; i < terms.length; i++) { - builder.add(new Term(field.fieldType.name(), terms[i].bytes()), positions[i]); - } - Query q = builder.build(); - if (field.boost != AbstractQueryBuilder.DEFAULT_BOOST) { - q = new BoostQuery(q, field.boost); - } - disjunctions.add(q); - } - return new DisjunctionMaxQuery(disjunctions, tiebreaker); - } - - @Override - protected Query blendTermQuery(Term term, MappedFieldType fieldType) { - if (queryBuilder == null) { - return super.blendTermQuery(term, fieldType); - } - return queryBuilder.blendTerm(term, fieldType); - } - - @Override - protected Query blendTermsQuery(Term[] terms, MappedFieldType fieldType) { - if (queryBuilder == null) { - return super.blendTermsQuery(terms, fieldType); - } - return queryBuilder.blendTerms(terms, fieldType); - } - - @Override - protected Query blendPhraseQuery(PhraseQuery query, MappedFieldType fieldType) { - if (queryBuilder == null) { - return super.blendPhraseQuery(query, fieldType); + return new DisjunctionMaxQuery(queries, tieBreaker); } - return queryBuilder.blendPhrase(query, fieldType); } - static final class FieldAndFieldType { + static final class FieldAndBoost { final MappedFieldType fieldType; final float boost; - FieldAndFieldType(MappedFieldType fieldType, float boost) { + FieldAndBoost(MappedFieldType fieldType, float boost) { this.fieldType = Objects.requireNonNull(fieldType); this.boost = boost; } diff --git a/server/src/test/java/org/apache/lucene/search/uhighlight/CustomUnifiedHighlighterTests.java b/server/src/test/java/org/apache/lucene/search/uhighlight/CustomUnifiedHighlighterTests.java index a6e676006fdbf..4e4b04d1ff19c 100644 --- a/server/src/test/java/org/apache/lucene/search/uhighlight/CustomUnifiedHighlighterTests.java +++ b/server/src/test/java/org/apache/lucene/search/uhighlight/CustomUnifiedHighlighterTests.java @@ -126,7 +126,7 @@ public void testMultiPhrasePrefixQuerySingleTerm() throws Exception { final String[] outputs = { "The quick brown fox." }; - MultiPhrasePrefixQuery query = new MultiPhrasePrefixQuery(); + MultiPhrasePrefixQuery query = new MultiPhrasePrefixQuery("text"); query.add(new Term("text", "bro")); assertHighlightOneDoc("text", inputs, new StandardAnalyzer(), query, Locale.ROOT, BreakIterator.getSentenceInstance(Locale.ROOT), 0, outputs); @@ -139,7 +139,7 @@ public void testMultiPhrasePrefixQuery() throws Exception { final String[] outputs = { "The quick brown fox." }; - MultiPhrasePrefixQuery query = new MultiPhrasePrefixQuery(); + MultiPhrasePrefixQuery query = new MultiPhrasePrefixQuery("text"); query.add(new Term("text", "quick")); query.add(new Term("text", "brown")); query.add(new Term("text", "fo")); diff --git a/server/src/test/java/org/elasticsearch/common/lucene/search/MultiPhrasePrefixQueryTests.java b/server/src/test/java/org/elasticsearch/common/lucene/search/MultiPhrasePrefixQueryTests.java index 23b6939fe7a70..f0d4c88e01c19 100644 --- a/server/src/test/java/org/elasticsearch/common/lucene/search/MultiPhrasePrefixQueryTests.java +++ b/server/src/test/java/org/elasticsearch/common/lucene/search/MultiPhrasePrefixQueryTests.java @@ -43,24 +43,24 @@ public void testSimple() throws Exception { IndexReader reader = DirectoryReader.open(writer); IndexSearcher searcher = new IndexSearcher(reader); - MultiPhrasePrefixQuery query = new MultiPhrasePrefixQuery(); + MultiPhrasePrefixQuery query = new MultiPhrasePrefixQuery("field"); query.add(new Term("field", "aa")); assertThat(searcher.count(query), equalTo(1)); - query = new MultiPhrasePrefixQuery(); + query = new MultiPhrasePrefixQuery("field"); query.add(new Term("field", "aaa")); query.add(new Term("field", "bb")); assertThat(searcher.count(query), equalTo(1)); - query = new MultiPhrasePrefixQuery(); + query = new MultiPhrasePrefixQuery("field"); query.setSlop(1); query.add(new Term("field", "aaa")); query.add(new Term("field", "cc")); assertThat(searcher.count(query), equalTo(1)); - query = new MultiPhrasePrefixQuery(); + query = new MultiPhrasePrefixQuery("field"); query.setSlop(1); query.add(new Term("field", "xxx")); assertThat(searcher.count(query), equalTo(0)); } -} \ No newline at end of file +} diff --git a/server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java index acd6c9ee6f80b..e527f98f73c20 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java @@ -34,13 +34,19 @@ import org.apache.lucene.search.MultiPhraseQuery; import org.apache.lucene.search.PhraseQuery; import org.apache.lucene.search.Query; +import org.apache.lucene.search.SynonymQuery; import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.spans.FieldMaskingSpanQuery; +import org.apache.lucene.search.spans.SpanNearQuery; +import org.apache.lucene.search.spans.SpanOrQuery; +import org.apache.lucene.search.spans.SpanTermQuery; import org.apache.lucene.util.BytesRef; import org.elasticsearch.Version; import org.elasticsearch.action.index.IndexRequest; import org.elasticsearch.common.Strings; import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.compress.CompressedXContent; +import org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery; import org.elasticsearch.common.lucene.uid.Versions; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.xcontent.ToXContent; @@ -52,6 +58,7 @@ import org.elasticsearch.index.engine.Engine; import org.elasticsearch.index.mapper.MapperService.MergeReason; import org.elasticsearch.index.mapper.TextFieldMapper.TextFieldType; +import org.elasticsearch.index.query.MatchPhrasePrefixQueryBuilder; import org.elasticsearch.index.query.MatchPhraseQueryBuilder; import org.elasticsearch.index.query.QueryShardContext; import org.elasticsearch.index.search.MatchQuery; @@ -956,4 +963,125 @@ public void testIndexPrefixMapping() throws IOException { assertThat(e.getMessage(), containsString("Cannot set index_prefixes on unindexed field [field]")); } } + + public void testFastPhrasePrefixes() throws IOException { + QueryShardContext queryShardContext = indexService.newQueryShardContext( + randomInt(20), null, () -> { + throw new UnsupportedOperationException(); + }, null); + + String mapping = Strings.toString(XContentFactory.jsonBuilder().startObject().startObject("type") + .startObject("properties") + .startObject("field") + .field("type", "text") + .field("analyzer", "my_stop_analyzer") + .startObject("index_prefixes") + .field("min_chars", 2) + .field("max_chars", 10) + .endObject() + .endObject() + .startObject("synfield") + .field("type", "text") + .field("analyzer", "standard") // will be replaced with MockSynonymAnalyzer + .field("index_phrases", true) + .startObject("index_prefixes") + .field("min_chars", 2) + .field("max_chars", 10) + .endObject() + .endObject() + .endObject() + .endObject().endObject()); + + queryShardContext.getMapperService().merge("type", new CompressedXContent(mapping), MergeReason.MAPPING_UPDATE); + + { + Query q = new MatchPhrasePrefixQueryBuilder("field", "two words").toQuery(queryShardContext); + Query expected = new SpanNearQuery.Builder("field", true) + .addClause(new SpanTermQuery(new Term("field", "two"))) + .addClause(new FieldMaskingSpanQuery( + new SpanTermQuery(new Term("field._index_prefix", "words")), "field") + ) + .build(); + assertThat(q, equalTo(expected)); + } + + { + Query q = new MatchPhrasePrefixQueryBuilder("field", "three words here").toQuery(queryShardContext); + Query expected = new SpanNearQuery.Builder("field", true) + .addClause(new SpanTermQuery(new Term("field", "three"))) + .addClause(new SpanTermQuery(new Term("field", "words"))) + .addClause(new FieldMaskingSpanQuery( + new SpanTermQuery(new Term("field._index_prefix", "here")), "field") + ) + .build(); + assertThat(q, equalTo(expected)); + } + + { + Query q = new MatchPhrasePrefixQueryBuilder("field", "two words").slop(1).toQuery(queryShardContext); + MultiPhrasePrefixQuery mpq = new MultiPhrasePrefixQuery("field"); + mpq.setSlop(1); + mpq.add(new Term("field", "two")); + mpq.add(new Term("field", "words")); + assertThat(q, equalTo(mpq)); + } + + { + Query q = new MatchPhrasePrefixQueryBuilder("field", "singleton").toQuery(queryShardContext); + assertThat(q, is(new SynonymQuery(new Term("field._index_prefix", "singleton")))); + } + + { + + Query q = new MatchPhrasePrefixQueryBuilder("field", "sparkle a stopword").toQuery(queryShardContext); + Query expected = new SpanNearQuery.Builder("field", true) + .addClause(new SpanTermQuery(new Term("field", "sparkle"))) + .addGap(1) + .addClause(new FieldMaskingSpanQuery( + new SpanTermQuery(new Term("field._index_prefix", "stopword")), "field") + ) + .build(); + assertThat(q, equalTo(expected)); + } + + { + MatchQuery matchQuery = new MatchQuery(queryShardContext); + matchQuery.setAnalyzer(new MockSynonymAnalyzer()); + Query q = matchQuery.parse(MatchQuery.Type.PHRASE_PREFIX, "synfield", "motor dogs"); + Query expected = new SpanNearQuery.Builder("synfield", true) + .addClause(new SpanTermQuery(new Term("synfield", "motor"))) + .addClause( + new SpanOrQuery( + new FieldMaskingSpanQuery( + new SpanTermQuery(new Term("synfield._index_prefix", "dogs")), "synfield" + ), + new FieldMaskingSpanQuery( + new SpanTermQuery(new Term("synfield._index_prefix", "dog")), "synfield" + ) + ) + ) + .build(); + assertThat(q, equalTo(expected)); + } + + { + MatchQuery matchQuery = new MatchQuery(queryShardContext); + matchQuery.setPhraseSlop(1); + matchQuery.setAnalyzer(new MockSynonymAnalyzer()); + Query q = matchQuery.parse(MatchQuery.Type.PHRASE_PREFIX, "synfield", "two dogs"); + MultiPhrasePrefixQuery mpq = new MultiPhrasePrefixQuery("synfield"); + mpq.setSlop(1); + mpq.add(new Term("synfield", "two")); + mpq.add(new Term[] { new Term("synfield", "dogs"), new Term("synfield", "dog") }); + assertThat(q, equalTo(mpq)); + } + + { + Query q = new MatchPhrasePrefixQueryBuilder("field", "motor d").toQuery(queryShardContext); + MultiPhrasePrefixQuery mpq = new MultiPhrasePrefixQuery("field"); + mpq.add(new Term("field", "motor")); + mpq.add(new Term("field", "d")); + assertThat(q, equalTo(mpq)); + } + } } diff --git a/server/src/test/java/org/elasticsearch/index/query/MatchPhrasePrefixQueryBuilderTests.java b/server/src/test/java/org/elasticsearch/index/query/MatchPhrasePrefixQueryBuilderTests.java index fd722ef0c77af..a6aa53e3aa0e9 100644 --- a/server/src/test/java/org/elasticsearch/index/query/MatchPhrasePrefixQueryBuilderTests.java +++ b/server/src/test/java/org/elasticsearch/index/query/MatchPhrasePrefixQueryBuilderTests.java @@ -19,12 +19,9 @@ package org.elasticsearch.index.query; -import org.apache.lucene.search.BooleanQuery; -import org.apache.lucene.search.IndexOrDocValuesQuery; import org.apache.lucene.search.MatchNoDocsQuery; -import org.apache.lucene.search.PointRangeQuery; import org.apache.lucene.search.Query; -import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.SynonymQuery; import org.elasticsearch.common.ParsingException; import org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery; import org.elasticsearch.search.internal.SearchContext; @@ -34,7 +31,6 @@ import java.util.HashMap; import java.util.Map; -import static org.elasticsearch.test.AbstractBuilderTestCase.STRING_ALIAS_FIELD_NAME; import static org.hamcrest.CoreMatchers.either; import static org.hamcrest.CoreMatchers.instanceOf; import static org.hamcrest.Matchers.containsString; @@ -43,8 +39,7 @@ public class MatchPhrasePrefixQueryBuilderTests extends AbstractQueryTestCase { @Override protected MatchPhrasePrefixQueryBuilder doCreateTestQueryBuilder() { - String fieldName = randomFrom(STRING_FIELD_NAME, STRING_ALIAS_FIELD_NAME, BOOLEAN_FIELD_NAME, INT_FIELD_NAME, - DOUBLE_FIELD_NAME, DATE_FIELD_NAME); + String fieldName = randomFrom(STRING_FIELD_NAME, STRING_ALIAS_FIELD_NAME); Object value; if (isTextField(fieldName)) { int terms = randomIntBetween(0, 3); @@ -91,10 +86,9 @@ protected Map getAlternateVersions() { protected void doAssertLuceneQuery(MatchPhrasePrefixQueryBuilder queryBuilder, Query query, SearchContext context) throws IOException { assertThat(query, notNullValue()); - assertThat(query, - either(instanceOf(BooleanQuery.class)).or(instanceOf(MultiPhrasePrefixQuery.class)) - .or(instanceOf(TermQuery.class)).or(instanceOf(PointRangeQuery.class)) - .or(instanceOf(IndexOrDocValuesQuery.class)).or(instanceOf(MatchNoDocsQuery.class))); + assertThat(query, either(instanceOf(MultiPhrasePrefixQuery.class)) + .or(instanceOf(SynonymQuery.class)) + .or(instanceOf(MatchNoDocsQuery.class))); } public void testIllegalValues() { diff --git a/server/src/test/java/org/elasticsearch/index/query/MatchQueryBuilderTests.java b/server/src/test/java/org/elasticsearch/index/query/MatchQueryBuilderTests.java index 184ee2759c15e..c258cce6c7c50 100644 --- a/server/src/test/java/org/elasticsearch/index/query/MatchQueryBuilderTests.java +++ b/server/src/test/java/org/elasticsearch/index/query/MatchQueryBuilderTests.java @@ -25,7 +25,6 @@ import org.apache.lucene.queries.ExtendedCommonTermsQuery; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; -import org.apache.lucene.search.BoostQuery; import org.apache.lucene.search.FuzzyQuery; import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.MatchNoDocsQuery; @@ -371,13 +370,10 @@ protected void initializeAdditionalMappings(MapperService mapperService) throws public void testMatchPhrasePrefixWithBoost() throws Exception { QueryShardContext context = createShardContext(); { - // field boost is applied on a single term query + // field boost is ignored on a single term query MatchPhrasePrefixQueryBuilder builder = new MatchPhrasePrefixQueryBuilder("string_boost", "foo"); Query query = builder.toQuery(context); - assertThat(query, instanceOf(BoostQuery.class)); - assertThat(((BoostQuery) query).getBoost(), equalTo(4f)); - Query innerQuery = ((BoostQuery) query).getQuery(); - assertThat(innerQuery, instanceOf(MultiPhrasePrefixQuery.class)); + assertThat(query, instanceOf(MultiPhrasePrefixQuery.class)); } { diff --git a/server/src/test/java/org/elasticsearch/index/query/MultiMatchQueryBuilderTests.java b/server/src/test/java/org/elasticsearch/index/query/MultiMatchQueryBuilderTests.java index 43c76f028e22e..27651e0da0de4 100644 --- a/server/src/test/java/org/elasticsearch/index/query/MultiMatchQueryBuilderTests.java +++ b/server/src/test/java/org/elasticsearch/index/query/MultiMatchQueryBuilderTests.java @@ -91,7 +91,12 @@ protected MultiMatchQueryBuilder doCreateTestQueryBuilder() { // sets other parameters of the multi match query if (randomBoolean()) { - query.type(randomFrom(MultiMatchQueryBuilder.Type.values())); + if (fieldName.equals(STRING_FIELD_NAME)) { + query.type(randomFrom(MultiMatchQueryBuilder.Type.values())); + } else { + query.type(randomValueOtherThan(MultiMatchQueryBuilder.Type.PHRASE_PREFIX, + () -> randomFrom(MultiMatchQueryBuilder.Type.values()))); + } } if (randomBoolean()) { query.operator(randomFrom(Operator.values())); @@ -384,6 +389,11 @@ public void testDefaultField() throws Exception { ), 0.0f ); assertEquals(expected, query); + + context.getIndexSettings().updateIndexMetaData( + newIndexMeta("index", context.getIndexSettings().getSettings(), + Settings.builder().putNull("index.query.default_field").build()) + ); } public void testWithStopWords() throws Exception { diff --git a/server/src/test/java/org/elasticsearch/index/query/QueryStringQueryBuilderTests.java b/server/src/test/java/org/elasticsearch/index/query/QueryStringQueryBuilderTests.java index baa0fed01bbf0..0eb6de7da252f 100644 --- a/server/src/test/java/org/elasticsearch/index/query/QueryStringQueryBuilderTests.java +++ b/server/src/test/java/org/elasticsearch/index/query/QueryStringQueryBuilderTests.java @@ -1208,20 +1208,21 @@ public void testUnmappedFieldRewriteToMatchNoDocs() throws IOException { .field("unmapped_field") .lenient(true) .toQuery(createShardContext()); - assertEquals(new MatchNoDocsQuery(""), query); + assertEquals(new BooleanQuery.Builder().build(), query); // Unmapped prefix field query = new QueryStringQueryBuilder("unmapped_field:hello") .lenient(true) .toQuery(createShardContext()); - assertEquals(new MatchNoDocsQuery(""), query); + assertEquals(new BooleanQuery.Builder().build(), query); // Unmapped fields query = new QueryStringQueryBuilder("hello") .lenient(true) .field("unmapped_field") + .field("another_field") .toQuery(createShardContext()); - assertEquals(new MatchNoDocsQuery(""), query); + assertEquals(new BooleanQuery.Builder().build(), query); } public void testDefaultField() throws Exception { diff --git a/server/src/test/java/org/elasticsearch/index/query/SpanMultiTermQueryBuilderTests.java b/server/src/test/java/org/elasticsearch/index/query/SpanMultiTermQueryBuilderTests.java index 47db7d42d8cd0..4c59e25804a55 100644 --- a/server/src/test/java/org/elasticsearch/index/query/SpanMultiTermQueryBuilderTests.java +++ b/server/src/test/java/org/elasticsearch/index/query/SpanMultiTermQueryBuilderTests.java @@ -32,8 +32,8 @@ import org.apache.lucene.search.PrefixQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.TopTermsRewrite; import org.apache.lucene.search.spans.FieldMaskingSpanQuery; -import org.apache.lucene.search.spans.SpanBoostQuery; import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper; import org.apache.lucene.search.spans.SpanQuery; import org.apache.lucene.search.spans.SpanTermQuery; @@ -42,6 +42,7 @@ import org.elasticsearch.common.Strings; import org.elasticsearch.common.compress.CompressedXContent; import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.lucene.search.SpanBooleanQueryRewriteWithMaxClause; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.index.mapper.MapperService; import org.elasticsearch.search.internal.SearchContext; @@ -55,6 +56,7 @@ import static org.hamcrest.CoreMatchers.equalTo; import static org.hamcrest.CoreMatchers.instanceOf; import static org.hamcrest.CoreMatchers.either; +import static org.hamcrest.CoreMatchers.startsWith; public class SpanMultiTermQueryBuilderTests extends AbstractQueryTestCase { @Override @@ -68,6 +70,9 @@ protected void initializeAdditionalMappings(MapperService mapperService) throws .field("type", "alias") .field("path", "prefix_field") .endObject() + .startObject("body") + .field("type", "text") + .endObject() .endObject().endObject().endObject(); mapperService.merge("_doc", @@ -85,23 +90,26 @@ protected void doAssertLuceneQuery(SpanMultiTermQueryBuilder queryBuilder, Query if (query instanceof SpanMatchNoDocsQuery) { return; } - if (queryBuilder.innerQuery().boost() != AbstractQueryBuilder.DEFAULT_BOOST) { - assertThat(query, instanceOf(SpanBoostQuery.class)); - SpanBoostQuery boostQuery = (SpanBoostQuery) query; - assertThat(boostQuery.getBoost(), equalTo(queryBuilder.innerQuery().boost())); - query = boostQuery.getQuery(); - } - assertThat(query, instanceOf(SpanMultiTermQueryWrapper.class)); - SpanMultiTermQueryWrapper spanMultiTermQueryWrapper = (SpanMultiTermQueryWrapper) query; - Query multiTermQuery = queryBuilder.innerQuery().toQuery(context.getQueryShardContext()); - if (queryBuilder.innerQuery().boost() != AbstractQueryBuilder.DEFAULT_BOOST) { - assertThat(multiTermQuery, instanceOf(BoostQuery.class)); - BoostQuery boostQuery = (BoostQuery) multiTermQuery; - multiTermQuery = boostQuery.getQuery(); + assertThat(query, either(instanceOf(SpanMultiTermQueryWrapper.class)).or(instanceOf(FieldMaskingSpanQuery.class))); + if (query instanceof SpanMultiTermQueryWrapper) { + SpanMultiTermQueryWrapper wrapper = (SpanMultiTermQueryWrapper) query; + Query innerQuery = queryBuilder.innerQuery().toQuery(context.getQueryShardContext()); + if (queryBuilder.innerQuery().boost() != AbstractQueryBuilder.DEFAULT_BOOST) { + assertThat(innerQuery, instanceOf(BoostQuery.class)); + BoostQuery boostQuery = (BoostQuery) innerQuery; + innerQuery = boostQuery.getQuery(); + } + assertThat(innerQuery, instanceOf(MultiTermQuery.class)); + MultiTermQuery multiQuery = (MultiTermQuery) innerQuery; + if (multiQuery.getRewriteMethod() instanceof TopTermsRewrite) { + assertThat(wrapper.getRewriteMethod(), instanceOf(SpanMultiTermQueryWrapper.TopTermsSpanBooleanQueryRewrite.class)); + } else { + assertThat(wrapper.getRewriteMethod(), instanceOf(SpanBooleanQueryRewriteWithMaxClause.class)); + } + } else if (query instanceof FieldMaskingSpanQuery) { + FieldMaskingSpanQuery mask = (FieldMaskingSpanQuery) query; + assertThat(mask.getMaskedQuery(), instanceOf(TermQuery.class)); } - assertThat(multiTermQuery, either(instanceOf(MultiTermQuery.class)).or(instanceOf(TermQuery.class))); - assertThat(spanMultiTermQueryWrapper.getWrappedQuery(), - equalTo(new SpanMultiTermQueryWrapper<>((MultiTermQuery) multiTermQuery).getWrappedQuery())); } public void testIllegalArgument() { @@ -168,11 +176,10 @@ public String fieldName() { */ public void testUnsupportedInnerQueryType() throws IOException { MultiTermQueryBuilder query = new TermMultiTermQueryBuilder(); - SpanMultiTermQueryBuilder spamMultiTermQuery = new SpanMultiTermQueryBuilder(query); + SpanMultiTermQueryBuilder spanMultiTermQuery = new SpanMultiTermQueryBuilder(query); UnsupportedOperationException e = expectThrows(UnsupportedOperationException.class, - () -> spamMultiTermQuery.toQuery(createShardContext())); - assertThat(e.getMessage(), containsString("unsupported inner query generated by " + TermMultiTermQueryBuilder.class.getName() + - ", should be " + MultiTermQuery.class.getName())); + () -> spanMultiTermQuery.toQuery(createShardContext())); + assertThat(e.getMessage(), startsWith("unsupported inner query")); } public void testToQueryInnerSpanMultiTerm() throws IOException { @@ -184,50 +191,39 @@ public void testToQueryInnerSpanMultiTerm() throws IOException { public void testToQueryInnerTermQuery() throws IOException { String fieldName = randomFrom("prefix_field", "prefix_field_alias"); final QueryShardContext context = createShardContext(); - if (context.getIndexSettings().getIndexVersionCreated().onOrAfter(Version.V_6_4_0)) { - Query query = new SpanMultiTermQueryBuilder(new PrefixQueryBuilder(fieldName, "foo")) - .toQuery(context); - assertThat(query, instanceOf(FieldMaskingSpanQuery.class)); - FieldMaskingSpanQuery fieldSpanQuery = (FieldMaskingSpanQuery) query; - assertThat(fieldSpanQuery.getField(), equalTo("prefix_field")); - assertThat(fieldSpanQuery.getMaskedQuery(), instanceOf(SpanTermQuery.class)); - SpanTermQuery spanTermQuery = (SpanTermQuery) fieldSpanQuery.getMaskedQuery(); - assertThat(spanTermQuery.getTerm().text(), equalTo("foo")); - - query = new SpanMultiTermQueryBuilder(new PrefixQueryBuilder(fieldName, "foo")) - .boost(2.0f) - .toQuery(context); - assertThat(query, instanceOf(SpanBoostQuery.class)); - SpanBoostQuery boostQuery = (SpanBoostQuery) query; - assertThat(boostQuery.getBoost(), equalTo(2.0f)); - assertThat(boostQuery.getQuery(), instanceOf(FieldMaskingSpanQuery.class)); - fieldSpanQuery = (FieldMaskingSpanQuery) boostQuery.getQuery(); - assertThat(fieldSpanQuery.getField(), equalTo("prefix_field")); - assertThat(fieldSpanQuery.getMaskedQuery(), instanceOf(SpanTermQuery.class)); - spanTermQuery = (SpanTermQuery) fieldSpanQuery.getMaskedQuery(); - assertThat(spanTermQuery.getTerm().text(), equalTo("foo")); - } else { - Query query = new SpanMultiTermQueryBuilder(new PrefixQueryBuilder(fieldName, "foo")) - .toQuery(context); + { + Query query = new SpanMultiTermQueryBuilder(new PrefixQueryBuilder(fieldName, "foo")).toQuery(context); + if (context.getIndexSettings().getIndexVersionCreated().onOrAfter(Version.V_6_4_0)) { + assertThat(query, instanceOf(FieldMaskingSpanQuery.class)); + FieldMaskingSpanQuery fieldQuery = (FieldMaskingSpanQuery) query; + assertThat(fieldQuery.getMaskedQuery(), instanceOf(SpanTermQuery.class)); + assertThat(fieldQuery.getField(), equalTo("prefix_field")); + SpanTermQuery termQuery = (SpanTermQuery) fieldQuery.getMaskedQuery(); + assertThat(termQuery.getTerm().field(), equalTo("prefix_field._index_prefix")); + assertThat(termQuery.getTerm().text(), equalTo("foo")); + } else { + assertThat(query, instanceOf(SpanMultiTermQueryWrapper.class)); + SpanMultiTermQueryWrapper wrapper = (SpanMultiTermQueryWrapper) query; + assertThat(wrapper.getWrappedQuery(), instanceOf(PrefixQuery.class)); + PrefixQuery prefixQuery = (PrefixQuery) wrapper.getWrappedQuery(); + assertThat(prefixQuery.getField(), equalTo("prefix_field")); + assertThat(prefixQuery.getPrefix().text(), equalTo("foo")); + } + } + + { + Query query = new SpanMultiTermQueryBuilder(new PrefixQueryBuilder(fieldName, "f")).toQuery(context); assertThat(query, instanceOf(SpanMultiTermQueryWrapper.class)); SpanMultiTermQueryWrapper wrapper = (SpanMultiTermQueryWrapper) query; assertThat(wrapper.getWrappedQuery(), instanceOf(PrefixQuery.class)); + assertThat(wrapper.getField(), equalTo("prefix_field")); PrefixQuery prefixQuery = (PrefixQuery) wrapper.getWrappedQuery(); assertThat(prefixQuery.getField(), equalTo("prefix_field")); - assertThat(prefixQuery.getPrefix().text(), equalTo("foo")); - - query = new SpanMultiTermQueryBuilder(new PrefixQueryBuilder(fieldName, "foo")) - .boost(2.0f) - .toQuery(context); - assertThat(query, instanceOf(SpanBoostQuery.class)); - SpanBoostQuery boostQuery = (SpanBoostQuery) query; - assertThat(boostQuery.getBoost(), equalTo(2.0f)); - assertThat(boostQuery.getQuery(), instanceOf(SpanMultiTermQueryWrapper.class)); - wrapper = (SpanMultiTermQueryWrapper) boostQuery.getQuery(); - assertThat(wrapper.getWrappedQuery(), instanceOf(PrefixQuery.class)); - prefixQuery = (PrefixQuery) wrapper.getWrappedQuery(); - assertThat(prefixQuery.getField(), equalTo("prefix_field")); - assertThat(prefixQuery.getPrefix().text(), equalTo("foo")); + assertThat(prefixQuery.getPrefix().text(), equalTo("f")); + assertThat(wrapper.getRewriteMethod(), instanceOf(SpanBooleanQueryRewriteWithMaxClause.class)); + SpanBooleanQueryRewriteWithMaxClause rewrite = (SpanBooleanQueryRewriteWithMaxClause) wrapper.getRewriteMethod(); + assertThat(rewrite.getMaxExpansions(), equalTo(BooleanQuery.getMaxClauseCount())); + assertTrue(rewrite.isHardLimit()); } } @@ -255,17 +251,13 @@ public void testFromJson() throws IOException { } public void testDefaultMaxRewriteBuilder() throws Exception { - Query query = QueryBuilders.spanMultiTermQueryBuilder(QueryBuilders.prefixQuery("foo", "b")). - toQuery(createShardContext()); - - if (query instanceof SpanBoostQuery) { - query = ((SpanBoostQuery)query).getQuery(); - } + Query query = QueryBuilders.spanMultiTermQueryBuilder(QueryBuilders.prefixQuery("body", "b")) + .toQuery(createShardContext()); assertTrue(query instanceof SpanMultiTermQueryWrapper); if (query instanceof SpanMultiTermQueryWrapper) { - MultiTermQuery.RewriteMethod rewriteMethod = ((SpanMultiTermQueryWrapper)query).getRewriteMethod(); - assertTrue(rewriteMethod instanceof SpanMultiTermQueryBuilder.TopTermSpanBooleanQueryRewriteWithMaxClause); + MultiTermQuery.RewriteMethod rewriteMethod = ((SpanMultiTermQueryWrapper) query).getRewriteMethod(); + assertTrue(rewriteMethod instanceof SpanBooleanQueryRewriteWithMaxClause); } } @@ -285,7 +277,6 @@ public void testTermExpansionExceptionOnSpanFailure() throws Exception { Query query = queryBuilder.toQuery(createShardContext(reader)); RuntimeException exc = expectThrows(RuntimeException.class, () -> query.rewrite(reader)); assertThat(exc.getMessage(), containsString("maxClauseCount")); - } finally { BooleanQuery.setMaxClauseCount(origBoolMaxClauseCount); } @@ -296,17 +287,13 @@ public void testTermExpansionExceptionOnSpanFailure() throws Exception { public void testTopNMultiTermsRewriteInsideSpan() throws Exception { Query query = QueryBuilders.spanMultiTermQueryBuilder( - QueryBuilders.prefixQuery("foo", "b").rewrite("top_terms_boost_2000") + QueryBuilders.prefixQuery("body", "b").rewrite("top_terms_boost_2000") ).toQuery(createShardContext()); - if (query instanceof SpanBoostQuery) { - query = ((SpanBoostQuery)query).getQuery(); - } - assertTrue(query instanceof SpanMultiTermQueryWrapper); if (query instanceof SpanMultiTermQueryWrapper) { MultiTermQuery.RewriteMethod rewriteMethod = ((SpanMultiTermQueryWrapper)query).getRewriteMethod(); - assertFalse(rewriteMethod instanceof SpanMultiTermQueryBuilder.TopTermSpanBooleanQueryRewriteWithMaxClause); + assertFalse(rewriteMethod instanceof SpanBooleanQueryRewriteWithMaxClause); } } diff --git a/server/src/test/java/org/elasticsearch/index/search/MultiMatchQueryTests.java b/server/src/test/java/org/elasticsearch/index/search/MultiMatchQueryTests.java index 1087bbbf9fd8f..58baadd83573d 100644 --- a/server/src/test/java/org/elasticsearch/index/search/MultiMatchQueryTests.java +++ b/server/src/test/java/org/elasticsearch/index/search/MultiMatchQueryTests.java @@ -27,7 +27,6 @@ import org.apache.lucene.search.BoostQuery; import org.apache.lucene.search.DisjunctionMaxQuery; import org.apache.lucene.search.MatchAllDocsQuery; -import org.apache.lucene.search.MatchNoDocsQuery; import org.apache.lucene.search.PhraseQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.SynonymQuery; @@ -44,7 +43,7 @@ import org.elasticsearch.index.mapper.MockFieldMapper.FakeFieldType; import org.elasticsearch.index.query.MultiMatchQueryBuilder; import org.elasticsearch.index.query.QueryShardContext; -import org.elasticsearch.index.search.MultiMatchQuery.FieldAndFieldType; +import org.elasticsearch.index.search.MultiMatchQuery.FieldAndBoost; import org.elasticsearch.plugins.Plugin; import org.elasticsearch.test.ESSingleNodeTestCase; import org.elasticsearch.test.MockKeywordPlugin; @@ -105,7 +104,8 @@ public void testCrossFieldMultiMatchQuery() throws IOException { for (float tieBreaker : new float[] {0.0f, 0.5f}) { Query parsedQuery = multiMatchQuery("banon") .field("name.first", 2) - .field("name.last", 3).field("foobar") + .field("name.last", 3) + .field("foobar") .type(MultiMatchQueryBuilder.Type.CROSS_FIELDS) .tieBreaker(tieBreaker) .toQuery(queryShardContext); @@ -113,11 +113,7 @@ public void testCrossFieldMultiMatchQuery() throws IOException { Query rewrittenQuery = searcher.searcher().rewrite(parsedQuery); Query tq1 = new BoostQuery(new TermQuery(new Term("name.first", "banon")), 2); Query tq2 = new BoostQuery(new TermQuery(new Term("name.last", "banon")), 3); - Query expected = new DisjunctionMaxQuery( - Arrays.asList( - new MatchNoDocsQuery("unknown field foobar"), - new DisjunctionMaxQuery(Arrays.asList(tq2, tq1), tieBreaker) - ), tieBreaker); + Query expected = new DisjunctionMaxQuery(Arrays.asList(tq2, tq1), tieBreaker); assertEquals(expected, rewrittenQuery); } } @@ -133,7 +129,7 @@ public void testBlendTerms() { Query expected = BlendedTermQuery.dismaxBlendedQuery(terms, boosts, 1.0f); Query actual = MultiMatchQuery.blendTerm( indexService.newQueryShardContext(randomInt(20), null, () -> { throw new UnsupportedOperationException(); }, null), - new BytesRef("baz"), null, 1f, false, new FieldAndFieldType(ft1, 2), new FieldAndFieldType(ft2, 3)); + new BytesRef("baz"), null, 1f, false, Arrays.asList(new FieldAndBoost(ft1, 2), new FieldAndBoost(ft2, 3))); assertEquals(expected, actual); } @@ -149,7 +145,7 @@ public void testBlendTermsWithFieldBoosts() { Query expected = BlendedTermQuery.dismaxBlendedQuery(terms, boosts, 1.0f); Query actual = MultiMatchQuery.blendTerm( indexService.newQueryShardContext(randomInt(20), null, () -> { throw new UnsupportedOperationException(); }, null), - new BytesRef("baz"), null, 1f, false, new FieldAndFieldType(ft1, 2), new FieldAndFieldType(ft2, 3)); + new BytesRef("baz"), null, 1f, false, Arrays.asList(new FieldAndBoost(ft1, 2), new FieldAndBoost(ft2, 3))); assertEquals(expected, actual); } @@ -171,7 +167,7 @@ public Query termQuery(Object value, QueryShardContext context) { ), 1f); Query actual = MultiMatchQuery.blendTerm( indexService.newQueryShardContext(randomInt(20), null, () -> { throw new UnsupportedOperationException(); }, null), - new BytesRef("baz"), null, 1f, true, new FieldAndFieldType(ft1, 2), new FieldAndFieldType(ft2, 3)); + new BytesRef("baz"), null, 1f, true, Arrays.asList(new FieldAndBoost(ft1, 2), new FieldAndBoost(ft2, 3))); assertEquals(expected, actual); } @@ -185,7 +181,7 @@ public Query termQuery(Object value, QueryShardContext context) { ft.setName("bar"); expectThrows(IllegalArgumentException.class, () -> MultiMatchQuery.blendTerm( indexService.newQueryShardContext(randomInt(20), null, () -> { throw new UnsupportedOperationException(); }, null), - new BytesRef("baz"), null, 1f, false, new FieldAndFieldType(ft, 1))); + new BytesRef("baz"), null, 1f, false, Arrays.asList(new FieldAndBoost(ft, 1)))); } public void testBlendNoTermQuery() { @@ -209,7 +205,7 @@ public Query termQuery(Object value, QueryShardContext context) { ), 1.0f); Query actual = MultiMatchQuery.blendTerm( indexService.newQueryShardContext(randomInt(20), null, () -> { throw new UnsupportedOperationException(); }, null), - new BytesRef("baz"), null, 1f, false, new FieldAndFieldType(ft1, 2), new FieldAndFieldType(ft2, 3)); + new BytesRef("baz"), null, 1f, false, Arrays.asList(new FieldAndBoost(ft1, 2), new FieldAndBoost(ft2, 3))); assertEquals(expected, actual); }