diff --git a/docs/reference/query-dsl/full-text-queries.asciidoc b/docs/reference/query-dsl/full-text-queries.asciidoc index aaa0a911372c8..f9714c1be3c8e 100644 --- a/docs/reference/query-dsl/full-text-queries.asciidoc +++ b/docs/reference/query-dsl/full-text-queries.asciidoc @@ -40,6 +40,11 @@ The queries in this group are: A simpler, more robust version of the `query_string` syntax suitable for exposing directly to users. +<>:: + + A full text query that allows fine-grained control of the ordering and + proximity of matching terms + include::match-query.asciidoc[] include::match-phrase-query.asciidoc[] @@ -53,3 +58,5 @@ include::common-terms-query.asciidoc[] include::query-string-query.asciidoc[] include::simple-query-string-query.asciidoc[] + +include::intervals-query.asciidoc[] diff --git a/docs/reference/query-dsl/intervals-query.asciidoc b/docs/reference/query-dsl/intervals-query.asciidoc new file mode 100644 index 0000000000000..790fdf08bfdce --- /dev/null +++ b/docs/reference/query-dsl/intervals-query.asciidoc @@ -0,0 +1,260 @@ +[[query-dsl-intervals-query]] +=== Intervals query + +An `intervals` query allows fine-grained control over the order and proximity of +matching terms. Matching rules are constructed from a small set of definitions, +and the rules are then applied to terms from a particular `field`. + +The definitions produce sequences of minimal intervals that span terms in a +body of text. These intervals can be further combined and filtered by +parent sources. + +The example below will search for the phrase `my favourite food` appearing +before the terms `hot` and `water` or `cold` and `porridge` in any order, in +the field `my_text` + +[source,js] +-------------------------------------------------- +POST _search +{ + "query": { + "intervals" : { + "my_text" : { + "all_of" : { + "ordered" : true, + "intervals" : [ + { + "match" : { + "query" : "my favourite food", + "max_gaps" : 0, + "ordered" : true + } + }, + { + "any_of" : { + "intervals" : [ + { "match" : { "query" : "hot water" } }, + { "match" : { "query" : "cold porridge" } } + ] + } + } + ] + }, + "boost" : 2.0, + "_name" : "favourite_food" + } + } + } +} +-------------------------------------------------- +// CONSOLE + +In the above example, the text `my favourite food is cold porridge` would +match because the two intervals matching `my favourite food` and `cold +porridge` appear in the correct order, but the text `when it's cold my +favourite food is porridge` would not match, because the interval matching +`cold porridge` starts before the interval matching `my favourite food`. + +[[intervals-match]] +==== `match` + +The `match` rule matches analyzed text, and takes the following parameters: + +[horizontal] +`query`:: +The text to match. +`max_gaps`:: +Specify a maximum number of gaps between the terms in the text. Terms that +appear further apart than this will not match. If unspecified, or set to -1, +then there is no width restriction on the match. If set to 0 then the terms +must appear next to each other. +`ordered`:: +Whether or not the terms must appear in their specified order. Defaults to +`false` +`analyzer`:: +Which analyzer should be used to analyze terms in the `query`. By +default, the search analyzer of the top-level field will be used. +`filter`:: +An optional <> + +[[intervals-all_of]] +==== `all_of` + +`all_of` returns returns matches that span a combination of other rules. + +[horizontal] +`intervals`:: +An array of rules to combine. All rules must produce a match in a +document for the overall source to match. +`max_gaps`:: +Specify a maximum number of gaps between the rules. Combinations that match +across a distance greater than this will not match. If set to -1 or +unspecified, there is no restriction on this distance. If set to 0, then the +matches produced by the rules must all appear immediately next to each other. +`ordered`:: +Whether the intervals produced by the rules should appear in the order in +which they are specified. Defaults to `false` +`filter`:: +An optional <> + +[[intervals-any_of]] +==== `any_of` + +The `any_of` rule emits intervals produced by any of its sub-rules. + +[horizontal] +`intervals`:: +An array of rules to match +`filter`:: +An optional <> + +[[interval_filter]] +==== filters + +You can filter intervals produced by any rules by their relation to the +intervals produced by another rule. The following example will return +documents that have the words `hot` and `porridge` within 10 positions +of each other, without the word `salty` in between: + +[source,js] +-------------------------------------------------- +POST _search +{ + "query": { + "intervals" : { + "my_text" : { + "match" : { + "query" : "hot porridge", + "max_gaps" : 10, + "filter" : { + "not_containing" : { + "match" : { + "query" : "salty" + } + } + } + } + } + } + } +} +-------------------------------------------------- +// CONSOLE + +The following filters are available: +[horizontal] +`containing`:: +Produces intervals that contain an interval from the filter rule +`contained_by`:: +Produces intervals that are contained by an interval from the filter rule +`not_containing`:: +Produces intervals that do not contain an interval from the filter rule +`not_contained_by`:: +Produces intervals that are not contained by an interval from the filter rule +`not_overlapping`:: +Produces intervals that do not overlap with an interval from the filter rule + +[[interval-minimization]] +==== Minimization + +The intervals query always minimizes intervals, to ensure that queries can +run in linear time. This can sometimes cause surprising results, particularly +when using `max_gaps` restrictions or filters. For example, take the +following query, searching for `salty` contained within the phrase `hot +porridge`: + +[source,js] +-------------------------------------------------- +POST _search +{ + "query": { + "intervals" : { + "my_text" : { + "match" : { + "query" : "salty", + "filter" : { + "contained_by" : { + "match" : { + "query" : "hot porridge" + } + } + } + } + } + } + } +} +-------------------------------------------------- +// CONSOLE + +This query will *not* match a document containing the phrase `hot porridge is +salty porridge`, because the intervals returned by the match query for `hot +porridge` only cover the initial two terms in this document, and these do not +overlap the intervals covering `salty`. + +Another restriction to be aware of is the case of `any_of` rules that contain +sub-rules which overlap. In particular, if one of the rules is a strict +prefix of the other, then the longer rule will never be matched, which can +cause surprises when used in combination with `max_gaps`. Consider the +following query, searching for `the` immediately followed by `big` or `big bad`, +immediately followed by `wolf`: + +[source,js] +-------------------------------------------------- +POST _search +{ + "query": { + "intervals" : { + "my_text" : { + "all_of" : { + "intervals" : [ + { "match" : { "query" : "the" } }, + { "any_of" : { + "intervals" : [ + { "match" : { "query" : "big" } }, + { "match" : { "query" : "big bad" } } + ] } }, + { "match" : { "query" : "wolf" } } + ], + "max_gaps" : 0, + "ordered" : true + } + } + } + } +} +-------------------------------------------------- +// CONSOLE + +Counter-intuitively, this query *will not* match the document `the big bad +wolf`, because the `any_of` rule in the middle will only produce intervals +for `big` - intervals for `big bad` being longer than those for `big`, while +starting at the same position, and so being minimized away. In these cases, +it's better to rewrite the query so that all of the options are explicitly +laid out at the top level: + +[source,js] +-------------------------------------------------- +POST _search +{ + "query": { + "intervals" : { + "my_text" : { + "any_of" : { + "intervals" : [ + { "match" : { + "query" : "the big bad wolf", + "ordered" : true, + "max_gaps" : 0 } }, + { "match" : { + "query" : "the big wolf", + "ordered" : true, + "max_gaps" : 0 } } + ] + } + } + } + } +} +-------------------------------------------------- +// CONSOLE \ No newline at end of file diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search/230_interval_query.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search/230_interval_query.yml new file mode 100644 index 0000000000000..2a25055be32d0 --- /dev/null +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search/230_interval_query.yml @@ -0,0 +1,327 @@ +setup: + - skip: + version: " - 6.99.99" + reason: "Implemented in 7.0" + + - do: + indices.create: + index: test + body: + mappings: + test: + properties: + text: + type: text + analyzer: standard + - do: + bulk: + refresh: true + body: + - '{"index": {"_index": "test", "_type": "test", "_id": "1"}}' + - '{"text" : "Some like it hot, some like it cold"}' + - '{"index": {"_index": "test", "_type": "test", "_id": "2"}}' + - '{"text" : "Its cold outside, theres no kind of atmosphere"}' + - '{"index": {"_index": "test", "_type": "test", "_id": "3"}}' + - '{"text" : "Baby its cold there outside"}' + - '{"index": {"_index": "test", "_type": "test", "_id": "4"}}' + - '{"text" : "Outside it is cold and wet"}' + +--- +"Test ordered matching": + - do: + search: + index: test + body: + query: + intervals: + text: + match: + query: "cold outside" + ordered: true + - match: { hits.total.value: 2 } + +--- +"Test default unordered matching": + - do: + search: + index: test + body: + query: + intervals: + text: + match: + query: "cold outside" + - match: { hits.total.value: 3 } + +--- +"Test explicit unordered matching": + - do: + search: + index: test + body: + query: + intervals: + text: + match: + query: "cold outside" + ordered: false + - match: { hits.total.value: 3 } + +--- +"Test phrase matching": + - do: + search: + index: test + body: + query: + intervals: + text: + match: + query: "cold outside" + ordered: true + max_gaps: 0 + - match: { hits.total.value: 1 } + +--- +"Test unordered max_gaps matching": + - do: + search: + index: test + body: + query: + intervals: + text: + match: + query: "cold outside" + max_gaps: 1 + - match: { hits.total.value: 2 } + +--- +"Test ordered max_gaps matching": + - do: + search: + index: test + body: + query: + intervals: + text: + match: + query: "cold outside" + max_gaps: 0 + ordered: true + - match: { hits.total.value: 1 } + +--- +"Test ordered combination with disjunction": + - do: + search: + index: test + body: + query: + intervals: + text: + all_of: + intervals: + - any_of: + intervals: + - match: + query: "cold" + - match: + query: "outside" + - match: + query: "atmosphere" + ordered: true + - match: { hits.total.value: 1 } + +--- +"Test ordered combination with max_gaps": + - do: + search: + index: test + body: + query: + intervals: + text: + all_of: + intervals: + - match: + query: "cold" + - match: + query: "outside" + max_gaps: 0 + ordered: true + - match: { hits.total.value: 1 } + +--- +"Test ordered combination": + - do: + search: + index: test + body: + query: + intervals: + text: + all_of: + intervals: + - match: + query: "cold" + - match: + query: "outside" + ordered: true + - match: { hits.total.value: 2 } + +--- +"Test unordered combination": + - do: + search: + index: test + body: + query: + intervals: + text: + all_of: + intervals: + - match: + query: "cold" + - match: + query: "outside" + max_gaps: 1 + ordered: false + - match: { hits.total.value: 2 } + +--- +"Test block combination": + - do: + search: + index: test + body: + query: + intervals: + text: + all_of: + intervals: + - match: + query: "cold" + - match: + query: "outside" + ordered: true + max_gaps: 0 + - match: { hits.total.value: 1 } + + +--- +"Test containing": + - do: + search: + index: test + body: + query: + intervals: + text: + all_of: + intervals: + - match: + query: "cold" + - match: + query: "outside" + ordered: false + filter: + containing: + match: + query: "is" + - match: { hits.total.value: 1 } + + +--- +"Test not containing": + - do: + search: + index: test + body: + query: + intervals: + text: + all_of: + intervals: + - match: + query: "cold" + - match: + query: "outside" + ordered: false + filter: + not_containing: + match: + query: "is" + - match: { hits.total.value: 2 } + +--- +"Test contained_by": + - do: + search: + index: test + body: + query: + intervals: + text: + match: + query: "is" + filter: + contained_by: + all_of: + intervals: + - match: + query: "cold" + - match: + query: "outside" + ordered: false + - match: { hits.total.value: 1 } + +--- +"Test not_contained_by": + - do: + search: + index: test + body: + query: + intervals: + text: + match: + query: "it" + filter: + not_contained_by: + all_of: + intervals: + - match: + query: "cold" + - match: + query: "outside" + - match: { hits.total.value: 1 } + +--- +"Test not_overlapping": + - do: + search: + index: test + body: + query: + intervals: + text: + all_of: + intervals: + - match: + query: "cold" + - match: + query: "outside" + ordered: true + filter: + not_overlapping: + all_of: + intervals: + - match: + query: "baby" + - match: + query: "there" + ordered: false + - match: { hits.total.value: 1 } + + diff --git a/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java b/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java index eaafeefa7e0dd..741b2300a4678 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java @@ -34,6 +34,7 @@ import org.apache.lucene.search.Query; import org.apache.lucene.search.TermInSetQuery; import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.intervals.IntervalsSource; import org.apache.lucene.util.BytesRef; import org.elasticsearch.ElasticsearchParseException; import org.elasticsearch.common.Nullable; @@ -374,6 +375,14 @@ public Query multiPhraseQuery(String field, TokenStream stream, int slop, boolea + "] which is of type [" + typeName() + "]"); } + /** + * Create an {@link IntervalsSource} to be used for proximity queries + */ + public IntervalsSource intervals(String query, int max_gaps, boolean ordered, NamedAnalyzer analyzer) throws IOException { + throw new IllegalArgumentException("Can only use interval queries on text fields - not on [" + name + + "] which is of type [" + typeName() + "]"); + } + /** * An enum used to describe the relation between the range of terms in a * shard when compared with a query range @@ -465,4 +474,5 @@ public static Term extractTerm(Query termQuery) { } return ((TermQuery) termQuery).getTerm(); } + } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java index d0419a0e44b24..5987e167dc9ab 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java @@ -39,6 +39,7 @@ import org.apache.lucene.search.PhraseQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.intervals.IntervalsSource; import org.elasticsearch.Version; import org.elasticsearch.common.collect.Iterators; import org.elasticsearch.common.settings.Settings; @@ -48,6 +49,7 @@ import org.elasticsearch.index.analysis.NamedAnalyzer; import org.elasticsearch.index.fielddata.IndexFieldData; import org.elasticsearch.index.fielddata.plain.PagedBytesIndexFieldData; +import org.elasticsearch.index.query.IntervalBuilder; import org.elasticsearch.index.query.QueryShardContext; import java.io.IOException; @@ -579,6 +581,15 @@ public Query existsQuery(QueryShardContext context) { } } + @Override + public IntervalsSource intervals(String text, int maxGaps, boolean ordered, NamedAnalyzer analyzer) throws IOException { + if (indexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) { + throw new IllegalArgumentException("Cannot create intervals against field [" + name() + "] with no positions indexed"); + } + IntervalBuilder builder = new IntervalBuilder(name(), analyzer == null ? searchAnalyzer() : analyzer); + return builder.analyzeText(text, maxGaps, ordered); + } + @Override public Query phraseQuery(String field, TokenStream stream, int slop, boolean enablePosIncrements) throws IOException { diff --git a/server/src/main/java/org/elasticsearch/index/query/IntervalBuilder.java b/server/src/main/java/org/elasticsearch/index/query/IntervalBuilder.java new file mode 100644 index 0000000000000..7f42eb137190d --- /dev/null +++ b/server/src/main/java/org/elasticsearch/index/query/IntervalBuilder.java @@ -0,0 +1,299 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.query; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.CachingTokenFilter; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; +import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute; +import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.MatchesIterator; +import org.apache.lucene.search.intervals.IntervalIterator; +import org.apache.lucene.search.intervals.Intervals; +import org.apache.lucene.search.intervals.IntervalsSource; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.graph.GraphTokenStreamFiniteStrings; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; +import java.util.Set; + +/** + * Constructs an IntervalsSource based on analyzed text + */ +public class IntervalBuilder { + + private final String field; + private final Analyzer analyzer; + + public IntervalBuilder(String field, Analyzer analyzer) { + this.field = field; + this.analyzer = analyzer; + } + + public IntervalsSource analyzeText(String query, int maxGaps, boolean ordered) throws IOException { + try (TokenStream ts = analyzer.tokenStream(field, query); + CachingTokenFilter stream = new CachingTokenFilter(ts)) { + return analyzeText(stream, maxGaps, ordered); + } + } + + protected IntervalsSource analyzeText(CachingTokenFilter stream, int maxGaps, boolean ordered) throws IOException { + + TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class); + PositionIncrementAttribute posIncAtt = stream.addAttribute(PositionIncrementAttribute.class); + PositionLengthAttribute posLenAtt = stream.addAttribute(PositionLengthAttribute.class); + + if (termAtt == null) { + return NO_INTERVALS; + } + + // phase 1: read through the stream and assess the situation: + // counting the number of tokens/positions and marking if we have any synonyms. + + int numTokens = 0; + boolean hasSynonyms = false; + boolean isGraph = false; + + stream.reset(); + while (stream.incrementToken()) { + numTokens++; + int positionIncrement = posIncAtt.getPositionIncrement(); + if (positionIncrement == 0) { + hasSynonyms = true; + } + int positionLength = posLenAtt.getPositionLength(); + if (positionLength > 1) { + isGraph = true; + } + } + + // phase 2: based on token count, presence of synonyms, and options + // formulate a single term, boolean, or phrase. + + if (numTokens == 0) { + return null; + } else if (numTokens == 1) { + // single term + return analyzeTerm(stream); + } else if (isGraph) { + // graph + return combineSources(analyzeGraph(stream), maxGaps, ordered); + } else { + // phrase + if (hasSynonyms) { + // phrase with single-term synonyms + return analyzeSynonyms(stream, maxGaps, ordered); + } else { + // simple phrase + return combineSources(analyzeTerms(stream), maxGaps, ordered); + } + } + + } + + protected IntervalsSource analyzeTerm(TokenStream ts) throws IOException { + TermToBytesRefAttribute bytesAtt = ts.addAttribute(TermToBytesRefAttribute.class); + ts.reset(); + ts.incrementToken(); + return Intervals.term(BytesRef.deepCopyOf(bytesAtt.getBytesRef())); + } + + protected static IntervalsSource combineSources(List sources, int maxGaps, boolean ordered) { + if (sources.size() == 0) { + return NO_INTERVALS; + } + if (sources.size() == 1) { + return sources.get(0); + } + IntervalsSource[] sourcesArray = sources.toArray(new IntervalsSource[0]); + if (maxGaps == 0 && ordered) { + return Intervals.phrase(sourcesArray); + } + IntervalsSource inner = ordered ? Intervals.ordered(sourcesArray) : Intervals.unordered(sourcesArray); + if (maxGaps == -1) { + return inner; + } + return Intervals.maxgaps(maxGaps, inner); + } + + protected List analyzeTerms(TokenStream ts) throws IOException { + List terms = new ArrayList<>(); + TermToBytesRefAttribute bytesAtt = ts.addAttribute(TermToBytesRefAttribute.class); + ts.reset(); + while (ts.incrementToken()) { + BytesRef term = bytesAtt.getBytesRef(); + terms.add(Intervals.term(BytesRef.deepCopyOf(term))); + } + ts.end(); + return terms; + } + + protected IntervalsSource analyzeSynonyms(TokenStream ts, int maxGaps, boolean ordered) throws IOException { + List terms = new ArrayList<>(); + List synonyms = new ArrayList<>(); + TermToBytesRefAttribute bytesAtt = ts.addAttribute(TermToBytesRefAttribute.class); + PositionIncrementAttribute posAtt = ts.addAttribute(PositionIncrementAttribute.class); + ts.reset(); + while (ts.incrementToken()) { + if (posAtt.getPositionIncrement() == 1) { + if (synonyms.size() == 1) { + terms.add(synonyms.get(0)); + } + else if (synonyms.size() > 1) { + terms.add(Intervals.or(synonyms.toArray(new IntervalsSource[0]))); + } + synonyms.clear(); + } + synonyms.add(Intervals.term(BytesRef.deepCopyOf(bytesAtt.getBytesRef()))); + } + if (synonyms.size() == 1) { + terms.add(synonyms.get(0)); + } + else { + terms.add(Intervals.or(synonyms.toArray(new IntervalsSource[0]))); + } + return combineSources(terms, maxGaps, ordered); + } + + protected List analyzeGraph(TokenStream source) throws IOException { + source.reset(); + GraphTokenStreamFiniteStrings graph = new GraphTokenStreamFiniteStrings(source); + + List clauses = new ArrayList<>(); + int[] articulationPoints = graph.articulationPoints(); + int lastState = 0; + int maxClauseCount = BooleanQuery.getMaxClauseCount(); + for (int i = 0; i <= articulationPoints.length; i++) { + int start = lastState; + int end = -1; + if (i < articulationPoints.length) { + end = articulationPoints[i]; + } + lastState = end; + if (graph.hasSidePath(start)) { + List paths = new ArrayList<>(); + Iterator it = graph.getFiniteStrings(start, end); + while (it.hasNext()) { + TokenStream ts = it.next(); + IntervalsSource phrase = combineSources(analyzeTerms(ts), 0, true); + if (paths.size() >= maxClauseCount) { + throw new BooleanQuery.TooManyClauses(); + } + paths.add(phrase); + } + if (paths.size() > 0) { + clauses.add(Intervals.or(paths.toArray(new IntervalsSource[0]))); + } + } else { + Iterator it = graph.getFiniteStrings(start, end); + TokenStream ts = it.next(); + clauses.addAll(analyzeTerms(ts)); + assert it.hasNext() == false; + } + } + return clauses; + } + + private static final IntervalsSource NO_INTERVALS = new IntervalsSource() { + + @Override + public IntervalIterator intervals(String field, LeafReaderContext ctx) { + return new IntervalIterator() { + @Override + public int start() { + return NO_MORE_INTERVALS; + } + + @Override + public int end() { + return NO_MORE_INTERVALS; + } + + @Override + public int gaps() { + throw new UnsupportedOperationException(); + } + + @Override + public int nextInterval() { + return NO_MORE_INTERVALS; + } + + @Override + public float matchCost() { + return 0; + } + + @Override + public int docID() { + return NO_MORE_DOCS; + } + + @Override + public int nextDoc() { + return NO_MORE_DOCS; + } + + @Override + public int advance(int target) { + return NO_MORE_DOCS; + } + + @Override + public long cost() { + return 0; + } + }; + } + + @Override + public MatchesIterator matches(String field, LeafReaderContext ctx, int doc) { + return null; + } + + @Override + public void extractTerms(String field, Set terms) { + + } + + @Override + public int hashCode() { + return 0; + } + + @Override + public boolean equals(Object other) { + return other == this; + } + + @Override + public String toString() { + return "no_match"; + } + }; + +} diff --git a/server/src/main/java/org/elasticsearch/index/query/IntervalQueryBuilder.java b/server/src/main/java/org/elasticsearch/index/query/IntervalQueryBuilder.java new file mode 100644 index 0000000000000..a1badc38323da --- /dev/null +++ b/server/src/main/java/org/elasticsearch/index/query/IntervalQueryBuilder.java @@ -0,0 +1,152 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.query; + +import org.apache.lucene.index.IndexOptions; +import org.apache.lucene.search.MatchNoDocsQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.intervals.IntervalQuery; +import org.elasticsearch.common.ParsingException; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.index.mapper.MappedFieldType; + +import java.io.IOException; +import java.util.Objects; + +/** + * Builder for {@link IntervalQuery} + */ +public class IntervalQueryBuilder extends AbstractQueryBuilder { + + public static final String NAME = "intervals"; + + private final String field; + private final IntervalsSourceProvider sourceProvider; + + public IntervalQueryBuilder(String field, IntervalsSourceProvider sourceProvider) { + this.field = field; + this.sourceProvider = sourceProvider; + } + + public IntervalQueryBuilder(StreamInput in) throws IOException { + super(in); + this.field = in.readString(); + this.sourceProvider = in.readNamedWriteable(IntervalsSourceProvider.class); + } + + @Override + protected void doWriteTo(StreamOutput out) throws IOException { + out.writeString(field); + out.writeNamedWriteable(sourceProvider); + } + + @Override + protected void doXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(NAME); + builder.field(field); + builder.startObject(); + sourceProvider.toXContent(builder, params); + printBoostAndQueryName(builder); + builder.endObject(); + builder.endObject(); + } + + public static IntervalQueryBuilder fromXContent(XContentParser parser) throws IOException { + if (parser.nextToken() != XContentParser.Token.FIELD_NAME) { + throw new ParsingException(parser.getTokenLocation(), "Expected [FIELD_NAME] but got [" + parser.currentToken() + "]"); + } + String field = parser.currentName(); + if (parser.nextToken() != XContentParser.Token.START_OBJECT) { + throw new ParsingException(parser.getTokenLocation(), "Expected [START_OBJECT] but got [" + parser.currentToken() + "]"); + } + String name = null; + float boost = 1; + IntervalsSourceProvider provider = null; + String providerName = null; + while (parser.nextToken() != XContentParser.Token.END_OBJECT) { + if (parser.currentToken() != XContentParser.Token.FIELD_NAME) { + throw new ParsingException(parser.getTokenLocation(), + "Expected [FIELD_NAME] but got [" + parser.currentToken() + "]"); + } + switch (parser.currentName()) { + case "_name": + parser.nextToken(); + name = parser.text(); + break; + case "boost": + parser.nextToken(); + boost = parser.floatValue(); + break; + default: + if (providerName != null) { + throw new ParsingException(parser.getTokenLocation(), + "Only one interval rule can be specified, found [" + providerName + "] and [" + parser.currentName() + "]"); + } + providerName = parser.currentName(); + provider = IntervalsSourceProvider.fromXContent(parser); + + } + } + if (parser.nextToken() != XContentParser.Token.END_OBJECT) { + throw new ParsingException(parser.getTokenLocation(), + "Expected [END_OBJECT] but got [" + parser.currentToken() + "]"); + } + if (provider == null) { + throw new ParsingException(parser.getTokenLocation(), "Missing intervals from interval query definition"); + } + IntervalQueryBuilder builder = new IntervalQueryBuilder(field, provider); + builder.queryName(name); + builder.boost(boost); + return builder; + + } + + @Override + protected Query doToQuery(QueryShardContext context) throws IOException { + MappedFieldType fieldType = context.fieldMapper(field); + if (fieldType == null) { + // Be lenient with unmapped fields so that cross-index search will work nicely + return new MatchNoDocsQuery(); + } + if (fieldType.tokenized() == false || + fieldType.indexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) { + throw new IllegalArgumentException("Cannot create IntervalQuery over field [" + field + "] with no indexed positions"); + } + return new IntervalQuery(field, sourceProvider.getSource(context, fieldType)); + } + + @Override + protected boolean doEquals(IntervalQueryBuilder other) { + return Objects.equals(field, other.field) && Objects.equals(sourceProvider, other.sourceProvider); + } + + @Override + protected int doHashCode() { + return Objects.hash(field, sourceProvider); + } + + @Override + public String getWriteableName() { + return NAME; + } +} diff --git a/server/src/main/java/org/elasticsearch/index/query/IntervalsSourceProvider.java b/server/src/main/java/org/elasticsearch/index/query/IntervalsSourceProvider.java new file mode 100644 index 0000000000000..79bcbe26fbc04 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/index/query/IntervalsSourceProvider.java @@ -0,0 +1,478 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.query; + +import org.apache.lucene.search.intervals.Intervals; +import org.apache.lucene.search.intervals.IntervalsSource; +import org.elasticsearch.common.ParseField; +import org.elasticsearch.common.ParsingException; +import org.elasticsearch.common.io.stream.NamedWriteable; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.io.stream.Writeable; +import org.elasticsearch.common.xcontent.ConstructingObjectParser; +import org.elasticsearch.common.xcontent.ToXContent; +import org.elasticsearch.common.xcontent.ToXContentFragment; +import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.index.analysis.NamedAnalyzer; +import org.elasticsearch.index.mapper.MappedFieldType; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Locale; +import java.util.Objects; + +import static org.elasticsearch.common.xcontent.ConstructingObjectParser.constructorArg; +import static org.elasticsearch.common.xcontent.ConstructingObjectParser.optionalConstructorArg; + +/** + * Factory class for {@link IntervalsSource} + * + * Built-in sources include {@link Match}, which analyzes a text string and converts it + * to a proximity source (phrase, ordered or unordered depending on how + * strict the matching should be); {@link Combine}, which allows proximity queries + * between different sub-sources; and {@link Disjunction}. + */ +public abstract class IntervalsSourceProvider implements NamedWriteable, ToXContentFragment { + + public abstract IntervalsSource getSource(QueryShardContext context, MappedFieldType fieldType) throws IOException; + + @Override + public abstract int hashCode(); + + @Override + public abstract boolean equals(Object other); + + public static IntervalsSourceProvider fromXContent(XContentParser parser) throws IOException { + assert parser.currentToken() == XContentParser.Token.FIELD_NAME; + switch (parser.currentName()) { + case "match": + return Match.fromXContent(parser); + case "any_of": + return Disjunction.fromXContent(parser); + case "all_of": + return Combine.fromXContent(parser); + } + throw new ParsingException(parser.getTokenLocation(), + "Unknown interval type [" + parser.currentName() + "], expecting one of [match, any_of, all_of]"); + } + + private static IntervalsSourceProvider parseInnerIntervals(XContentParser parser) throws IOException { + if (parser.nextToken() != XContentParser.Token.FIELD_NAME) { + throw new ParsingException(parser.getTokenLocation(), "Expected [FIELD_NAME] but got [" + parser.currentToken() + "]"); + } + IntervalsSourceProvider isp = IntervalsSourceProvider.fromXContent(parser); + if (parser.nextToken() != XContentParser.Token.END_OBJECT) { + throw new ParsingException(parser.getTokenLocation(), "Expected [END_OBJECT] but got [" + parser.currentToken() + "]"); + } + return isp; + } + + public static class Match extends IntervalsSourceProvider { + + public static final String NAME = "match"; + + private final String query; + private final int maxGaps; + private final boolean ordered; + private final String analyzer; + private final IntervalFilter filter; + + public Match(String query, int maxGaps, boolean ordered, String analyzer, IntervalFilter filter) { + this.query = query; + this.maxGaps = maxGaps; + this.ordered = ordered; + this.analyzer = analyzer; + this.filter = filter; + } + + public Match(StreamInput in) throws IOException { + this.query = in.readString(); + this.maxGaps = in.readVInt(); + this.ordered = in.readBoolean(); + this.analyzer = in.readOptionalString(); + this.filter = in.readOptionalWriteable(IntervalFilter::new); + } + + @Override + public IntervalsSource getSource(QueryShardContext context, MappedFieldType fieldType) throws IOException { + NamedAnalyzer analyzer = null; + if (this.analyzer != null) { + analyzer = context.getMapperService().getIndexAnalyzers().get(this.analyzer); + } + IntervalsSource source = fieldType.intervals(query, maxGaps, ordered, analyzer); + if (filter != null) { + return filter.filter(source, context, fieldType); + } + return source; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + Match match = (Match) o; + return maxGaps == match.maxGaps && + ordered == match.ordered && + Objects.equals(query, match.query) && + Objects.equals(filter, match.filter) && + Objects.equals(analyzer, match.analyzer); + } + + @Override + public int hashCode() { + return Objects.hash(query, maxGaps, ordered, analyzer, filter); + } + + @Override + public String getWriteableName() { + return NAME; + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeString(query); + out.writeVInt(maxGaps); + out.writeBoolean(ordered); + out.writeOptionalString(analyzer); + out.writeOptionalWriteable(filter); + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.field(NAME); + builder.startObject(); + builder.field("query", query); + builder.field("max_gaps", maxGaps); + builder.field("ordered", ordered); + if (analyzer != null) { + builder.field("analyzer", analyzer); + } + if (filter != null) { + builder.field("filter", filter); + } + return builder.endObject(); + } + + private static final ConstructingObjectParser PARSER = new ConstructingObjectParser<>(NAME, + args -> { + String query = (String) args[0]; + int max_gaps = (args[1] == null ? -1 : (Integer) args[1]); + boolean ordered = (args[2] != null && (boolean) args[2]); + String analyzer = (String) args[3]; + IntervalFilter filter = (IntervalFilter) args[4]; + return new Match(query, max_gaps, ordered, analyzer, filter); + }); + static { + PARSER.declareString(constructorArg(), new ParseField("query")); + PARSER.declareInt(optionalConstructorArg(), new ParseField("max_gaps")); + PARSER.declareBoolean(optionalConstructorArg(), new ParseField("ordered")); + PARSER.declareString(optionalConstructorArg(), new ParseField("analyzer")); + PARSER.declareObject(optionalConstructorArg(), (p, c) -> IntervalFilter.fromXContent(p), new ParseField("filter")); + } + + public static Match fromXContent(XContentParser parser) { + return PARSER.apply(parser, null); + } + } + + public static class Disjunction extends IntervalsSourceProvider { + + public static final String NAME = "any_of"; + + private final List subSources; + private final IntervalFilter filter; + + public Disjunction(List subSources, IntervalFilter filter) { + this.subSources = subSources; + this.filter = filter; + } + + public Disjunction(StreamInput in) throws IOException { + this.subSources = in.readNamedWriteableList(IntervalsSourceProvider.class); + this.filter = in.readOptionalWriteable(IntervalFilter::new); + } + + @Override + public IntervalsSource getSource(QueryShardContext ctx, MappedFieldType fieldType) throws IOException { + List sources = new ArrayList<>(); + for (IntervalsSourceProvider provider : subSources) { + sources.add(provider.getSource(ctx, fieldType)); + } + IntervalsSource source = Intervals.or(sources.toArray(new IntervalsSource[0])); + if (filter == null) { + return source; + } + return filter.filter(source, ctx, fieldType); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + Disjunction that = (Disjunction) o; + return Objects.equals(subSources, that.subSources); + } + + @Override + public int hashCode() { + return Objects.hash(subSources); + } + + @Override + public String getWriteableName() { + return NAME; + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeNamedWriteableList(subSources); + out.writeOptionalWriteable(filter); + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(NAME); + builder.startArray("intervals"); + for (IntervalsSourceProvider provider : subSources) { + builder.startObject(); + provider.toXContent(builder, params); + builder.endObject(); + } + builder.endArray(); + if (filter != null) { + builder.field("filter", filter); + } + return builder.endObject(); + } + + @SuppressWarnings("unchecked") + private static final ConstructingObjectParser PARSER = new ConstructingObjectParser<>(NAME, + args -> { + List subSources = (List)args[0]; + IntervalFilter filter = (IntervalFilter) args[1]; + return new Disjunction(subSources, filter); + }); + static { + PARSER.declareObjectArray(constructorArg(), (p, c) -> IntervalsSourceProvider.parseInnerIntervals(p), + new ParseField("intervals")); + PARSER.declareObject(optionalConstructorArg(), (p, c) -> IntervalFilter.fromXContent(p), + new ParseField("filter")); + } + + public static Disjunction fromXContent(XContentParser parser) throws IOException { + return PARSER.parse(parser, null); + } + } + + public static class Combine extends IntervalsSourceProvider { + + public static final String NAME = "all_of"; + + private final List subSources; + private final boolean ordered; + private final int maxGaps; + private final IntervalFilter filter; + + public Combine(List subSources, boolean ordered, int maxGaps, IntervalFilter filter) { + this.subSources = subSources; + this.ordered = ordered; + this.maxGaps = maxGaps; + this.filter = filter; + } + + public Combine(StreamInput in) throws IOException { + this.ordered = in.readBoolean(); + this.subSources = in.readNamedWriteableList(IntervalsSourceProvider.class); + this.maxGaps = in.readInt(); + this.filter = in.readOptionalWriteable(IntervalFilter::new); + } + + @Override + public IntervalsSource getSource(QueryShardContext ctx, MappedFieldType fieldType) throws IOException { + List ss = new ArrayList<>(); + for (IntervalsSourceProvider provider : subSources) { + ss.add(provider.getSource(ctx, fieldType)); + } + IntervalsSource source = IntervalBuilder.combineSources(ss, maxGaps, ordered); + if (filter != null) { + return filter.filter(source, ctx, fieldType); + } + return source; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + Combine combine = (Combine) o; + return Objects.equals(subSources, combine.subSources) && + ordered == combine.ordered && maxGaps == combine.maxGaps; + } + + @Override + public int hashCode() { + return Objects.hash(subSources, ordered, maxGaps); + } + + @Override + public String getWriteableName() { + return NAME; + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeBoolean(ordered); + out.writeNamedWriteableList(subSources); + out.writeInt(maxGaps); + out.writeOptionalWriteable(filter); + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(NAME); + builder.field("ordered", ordered); + builder.field("max_gaps", maxGaps); + builder.startArray("intervals"); + for (IntervalsSourceProvider provider : subSources) { + builder.startObject(); + provider.toXContent(builder, params); + builder.endObject(); + } + builder.endArray(); + if (filter != null) { + builder.field("filter", filter); + } + return builder.endObject(); + } + + @SuppressWarnings("unchecked") + static final ConstructingObjectParser PARSER = new ConstructingObjectParser<>(NAME, + args -> { + boolean ordered = (args[0] != null && (boolean) args[0]); + List subSources = (List)args[1]; + Integer maxGaps = (args[2] == null ? -1 : (Integer)args[2]); + IntervalFilter filter = (IntervalFilter) args[3]; + return new Combine(subSources, ordered, maxGaps, filter); + }); + static { + PARSER.declareBoolean(optionalConstructorArg(), new ParseField("ordered")); + PARSER.declareObjectArray(constructorArg(), (p, c) -> IntervalsSourceProvider.parseInnerIntervals(p), + new ParseField("intervals")); + PARSER.declareInt(optionalConstructorArg(), new ParseField("max_gaps")); + PARSER.declareObject(optionalConstructorArg(), (p, c) -> IntervalFilter.fromXContent(p), new ParseField("filter")); + } + + public static Combine fromXContent(XContentParser parser) { + return PARSER.apply(parser, null); + } + } + + public static class IntervalFilter implements ToXContent, Writeable { + + public static final String NAME = "filter"; + + private final String type; + private final IntervalsSourceProvider filter; + + public IntervalFilter(IntervalsSourceProvider filter, String type) { + this.filter = filter; + this.type = type.toLowerCase(Locale.ROOT); + } + + public IntervalFilter(StreamInput in) throws IOException { + this.type = in.readString(); + this.filter = in.readNamedWriteable(IntervalsSourceProvider.class); + } + + public IntervalsSource filter(IntervalsSource input, QueryShardContext context, MappedFieldType fieldType) throws IOException { + IntervalsSource filterSource = filter.getSource(context, fieldType); + switch (type) { + case "containing": + return Intervals.containing(input, filterSource); + case "contained_by": + return Intervals.containedBy(input, filterSource); + case "not_containing": + return Intervals.notContaining(input, filterSource); + case "not_contained_by": + return Intervals.notContainedBy(input, filterSource); + case "not_overlapping": + return Intervals.nonOverlapping(input, filterSource); + default: + throw new IllegalArgumentException("Unknown filter type [" + type + "]"); + } + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + IntervalFilter that = (IntervalFilter) o; + return Objects.equals(type, that.type) && + Objects.equals(filter, that.filter); + } + + @Override + public int hashCode() { + return Objects.hash(type, filter); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeString(type); + out.writeNamedWriteable(filter); + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(); + builder.field(type); + builder.startObject(); + filter.toXContent(builder, params); + builder.endObject(); + builder.endObject(); + return builder; + } + + public static IntervalFilter fromXContent(XContentParser parser) throws IOException { + if (parser.nextToken() != XContentParser.Token.FIELD_NAME) { + throw new ParsingException(parser.getTokenLocation(), "Expected [FIELD_NAME] but got [" + parser.currentToken() + "]"); + } + String type = parser.currentName(); + if (parser.nextToken() != XContentParser.Token.START_OBJECT) { + throw new ParsingException(parser.getTokenLocation(), "Expected [START_OBJECT] but got [" + parser.currentToken() + "]"); + } + if (parser.nextToken() != XContentParser.Token.FIELD_NAME) { + throw new ParsingException(parser.getTokenLocation(), "Expected [FIELD_NAME] but got [" + parser.currentToken() + "]"); + } + IntervalsSourceProvider intervals = IntervalsSourceProvider.fromXContent(parser); + if (parser.nextToken() != XContentParser.Token.END_OBJECT) { + throw new ParsingException(parser.getTokenLocation(), "Expected [END_OBJECT] but got [" + parser.currentToken() + "]"); + } + if (parser.nextToken() != XContentParser.Token.END_OBJECT) { + throw new ParsingException(parser.getTokenLocation(), "Expected [END_OBJECT] but got [" + parser.currentToken() + "]"); + } + return new IntervalFilter(intervals, type); + } + } + +} diff --git a/server/src/main/java/org/elasticsearch/plugins/SearchPlugin.java b/server/src/main/java/org/elasticsearch/plugins/SearchPlugin.java index c8d4e2e6209d2..d07467e5d1f8f 100644 --- a/server/src/main/java/org/elasticsearch/plugins/SearchPlugin.java +++ b/server/src/main/java/org/elasticsearch/plugins/SearchPlugin.java @@ -41,13 +41,13 @@ import org.elasticsearch.search.aggregations.bucket.significant.SignificantTerms; import org.elasticsearch.search.aggregations.bucket.significant.heuristics.SignificanceHeuristic; import org.elasticsearch.search.aggregations.bucket.significant.heuristics.SignificanceHeuristicParser; -import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator; -import org.elasticsearch.search.aggregations.pipeline.MovAvgPipelineAggregator; import org.elasticsearch.search.aggregations.pipeline.MovAvgModel; +import org.elasticsearch.search.aggregations.pipeline.MovAvgPipelineAggregator; +import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator; import org.elasticsearch.search.fetch.FetchSubPhase; import org.elasticsearch.search.fetch.subphase.highlight.Highlighter; -import org.elasticsearch.search.rescore.RescorerBuilder; import org.elasticsearch.search.rescore.Rescorer; +import org.elasticsearch.search.rescore.RescorerBuilder; import org.elasticsearch.search.suggest.Suggest; import org.elasticsearch.search.suggest.Suggester; import org.elasticsearch.search.suggest.SuggestionBuilder; @@ -127,7 +127,7 @@ default List getPipelineAggregations() { return emptyList(); } /** - * The next {@link Rescorer}s added by this plugin. + * The new {@link Rescorer}s added by this plugin. */ default List> getRescorers() { return emptyList(); @@ -239,6 +239,7 @@ public QuerySpec(String name, Writeable.Reader reader, QueryParser parser) super(name, reader, parser); } } + /** * Specification for an {@link Aggregation}. */ diff --git a/server/src/main/java/org/elasticsearch/search/SearchModule.java b/server/src/main/java/org/elasticsearch/search/SearchModule.java index 66e97230636e8..2531685b94557 100644 --- a/server/src/main/java/org/elasticsearch/search/SearchModule.java +++ b/server/src/main/java/org/elasticsearch/search/SearchModule.java @@ -44,6 +44,8 @@ import org.elasticsearch.index.query.GeoPolygonQueryBuilder; import org.elasticsearch.index.query.GeoShapeQueryBuilder; import org.elasticsearch.index.query.IdsQueryBuilder; +import org.elasticsearch.index.query.IntervalQueryBuilder; +import org.elasticsearch.index.query.IntervalsSourceProvider; import org.elasticsearch.index.query.MatchAllQueryBuilder; import org.elasticsearch.index.query.MatchNoneQueryBuilder; import org.elasticsearch.index.query.MatchPhrasePrefixQueryBuilder; @@ -153,60 +155,41 @@ import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregationBuilder; import org.elasticsearch.search.aggregations.bucket.terms.UnmappedTerms; import org.elasticsearch.search.aggregations.metrics.AvgAggregationBuilder; -import org.elasticsearch.search.aggregations.metrics.InternalAvg; import org.elasticsearch.search.aggregations.metrics.CardinalityAggregationBuilder; -import org.elasticsearch.search.aggregations.metrics.InternalCardinality; +import org.elasticsearch.search.aggregations.metrics.ExtendedStatsAggregationBuilder; import org.elasticsearch.search.aggregations.metrics.GeoBoundsAggregationBuilder; -import org.elasticsearch.search.aggregations.metrics.InternalGeoBounds; import org.elasticsearch.search.aggregations.metrics.GeoCentroidAggregationBuilder; +import org.elasticsearch.search.aggregations.metrics.InternalAvg; +import org.elasticsearch.search.aggregations.metrics.InternalCardinality; +import org.elasticsearch.search.aggregations.metrics.InternalExtendedStats; +import org.elasticsearch.search.aggregations.metrics.InternalGeoBounds; import org.elasticsearch.search.aggregations.metrics.InternalGeoCentroid; +import org.elasticsearch.search.aggregations.metrics.InternalHDRPercentileRanks; +import org.elasticsearch.search.aggregations.metrics.InternalHDRPercentiles; import org.elasticsearch.search.aggregations.metrics.InternalMax; -import org.elasticsearch.search.aggregations.metrics.MaxAggregationBuilder; +import org.elasticsearch.search.aggregations.metrics.InternalMedianAbsoluteDeviation; import org.elasticsearch.search.aggregations.metrics.InternalMin; +import org.elasticsearch.search.aggregations.metrics.InternalScriptedMetric; +import org.elasticsearch.search.aggregations.metrics.InternalStats; +import org.elasticsearch.search.aggregations.metrics.InternalSum; +import org.elasticsearch.search.aggregations.metrics.InternalTDigestPercentileRanks; +import org.elasticsearch.search.aggregations.metrics.InternalTDigestPercentiles; +import org.elasticsearch.search.aggregations.metrics.InternalTopHits; +import org.elasticsearch.search.aggregations.metrics.InternalValueCount; +import org.elasticsearch.search.aggregations.metrics.InternalWeightedAvg; +import org.elasticsearch.search.aggregations.metrics.MaxAggregationBuilder; +import org.elasticsearch.search.aggregations.metrics.MedianAbsoluteDeviationAggregationBuilder; import org.elasticsearch.search.aggregations.metrics.MinAggregationBuilder; import org.elasticsearch.search.aggregations.metrics.PercentileRanksAggregationBuilder; import org.elasticsearch.search.aggregations.metrics.PercentilesAggregationBuilder; -import org.elasticsearch.search.aggregations.metrics.InternalHDRPercentileRanks; -import org.elasticsearch.search.aggregations.metrics.InternalHDRPercentiles; -import org.elasticsearch.search.aggregations.metrics.InternalTDigestPercentileRanks; -import org.elasticsearch.search.aggregations.metrics.InternalTDigestPercentiles; -import org.elasticsearch.search.aggregations.metrics.InternalScriptedMetric; import org.elasticsearch.search.aggregations.metrics.ScriptedMetricAggregationBuilder; -import org.elasticsearch.search.aggregations.metrics.InternalStats; import org.elasticsearch.search.aggregations.metrics.StatsAggregationBuilder; -import org.elasticsearch.search.aggregations.metrics.ExtendedStatsAggregationBuilder; -import org.elasticsearch.search.aggregations.metrics.InternalExtendedStats; -import org.elasticsearch.search.aggregations.metrics.InternalSum; import org.elasticsearch.search.aggregations.metrics.SumAggregationBuilder; -import org.elasticsearch.search.aggregations.metrics.InternalTopHits; import org.elasticsearch.search.aggregations.metrics.TopHitsAggregationBuilder; -import org.elasticsearch.search.aggregations.metrics.InternalValueCount; import org.elasticsearch.search.aggregations.metrics.ValueCountAggregationBuilder; -import org.elasticsearch.search.aggregations.metrics.InternalWeightedAvg; import org.elasticsearch.search.aggregations.metrics.WeightedAvgAggregationBuilder; -import org.elasticsearch.search.aggregations.metrics.InternalMedianAbsoluteDeviation; -import org.elasticsearch.search.aggregations.metrics.MedianAbsoluteDeviationAggregationBuilder; -import org.elasticsearch.search.aggregations.pipeline.InternalSimpleValue; -import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator; -import org.elasticsearch.search.aggregations.pipeline.InternalBucketMetricValue; import org.elasticsearch.search.aggregations.pipeline.AvgBucketPipelineAggregationBuilder; import org.elasticsearch.search.aggregations.pipeline.AvgBucketPipelineAggregator; -import org.elasticsearch.search.aggregations.pipeline.MaxBucketPipelineAggregationBuilder; -import org.elasticsearch.search.aggregations.pipeline.MaxBucketPipelineAggregator; -import org.elasticsearch.search.aggregations.pipeline.MinBucketPipelineAggregationBuilder; -import org.elasticsearch.search.aggregations.pipeline.MinBucketPipelineAggregator; -import org.elasticsearch.search.aggregations.pipeline.InternalPercentilesBucket; -import org.elasticsearch.search.aggregations.pipeline.PercentilesBucketPipelineAggregationBuilder; -import org.elasticsearch.search.aggregations.pipeline.PercentilesBucketPipelineAggregator; -import org.elasticsearch.search.aggregations.pipeline.InternalStatsBucket; -import org.elasticsearch.search.aggregations.pipeline.StatsBucketPipelineAggregationBuilder; -import org.elasticsearch.search.aggregations.pipeline.StatsBucketPipelineAggregator; -import org.elasticsearch.search.aggregations.pipeline.ExtendedStatsBucketParser; -import org.elasticsearch.search.aggregations.pipeline.ExtendedStatsBucketPipelineAggregationBuilder; -import org.elasticsearch.search.aggregations.pipeline.ExtendedStatsBucketPipelineAggregator; -import org.elasticsearch.search.aggregations.pipeline.InternalExtendedStatsBucket; -import org.elasticsearch.search.aggregations.pipeline.SumBucketPipelineAggregationBuilder; -import org.elasticsearch.search.aggregations.pipeline.SumBucketPipelineAggregator; import org.elasticsearch.search.aggregations.pipeline.BucketScriptPipelineAggregationBuilder; import org.elasticsearch.search.aggregations.pipeline.BucketScriptPipelineAggregator; import org.elasticsearch.search.aggregations.pipeline.BucketSelectorPipelineAggregationBuilder; @@ -217,19 +200,38 @@ import org.elasticsearch.search.aggregations.pipeline.CumulativeSumPipelineAggregator; import org.elasticsearch.search.aggregations.pipeline.DerivativePipelineAggregationBuilder; import org.elasticsearch.search.aggregations.pipeline.DerivativePipelineAggregator; -import org.elasticsearch.search.aggregations.pipeline.InternalDerivative; -import org.elasticsearch.search.aggregations.pipeline.MovAvgPipelineAggregationBuilder; -import org.elasticsearch.search.aggregations.pipeline.MovAvgPipelineAggregator; import org.elasticsearch.search.aggregations.pipeline.EwmaModel; +import org.elasticsearch.search.aggregations.pipeline.ExtendedStatsBucketParser; +import org.elasticsearch.search.aggregations.pipeline.ExtendedStatsBucketPipelineAggregationBuilder; +import org.elasticsearch.search.aggregations.pipeline.ExtendedStatsBucketPipelineAggregator; import org.elasticsearch.search.aggregations.pipeline.HoltLinearModel; import org.elasticsearch.search.aggregations.pipeline.HoltWintersModel; +import org.elasticsearch.search.aggregations.pipeline.InternalBucketMetricValue; +import org.elasticsearch.search.aggregations.pipeline.InternalDerivative; +import org.elasticsearch.search.aggregations.pipeline.InternalExtendedStatsBucket; +import org.elasticsearch.search.aggregations.pipeline.InternalPercentilesBucket; +import org.elasticsearch.search.aggregations.pipeline.InternalSimpleValue; +import org.elasticsearch.search.aggregations.pipeline.InternalStatsBucket; import org.elasticsearch.search.aggregations.pipeline.LinearModel; +import org.elasticsearch.search.aggregations.pipeline.MaxBucketPipelineAggregationBuilder; +import org.elasticsearch.search.aggregations.pipeline.MaxBucketPipelineAggregator; +import org.elasticsearch.search.aggregations.pipeline.MinBucketPipelineAggregationBuilder; +import org.elasticsearch.search.aggregations.pipeline.MinBucketPipelineAggregator; import org.elasticsearch.search.aggregations.pipeline.MovAvgModel; -import org.elasticsearch.search.aggregations.pipeline.SimpleModel; +import org.elasticsearch.search.aggregations.pipeline.MovAvgPipelineAggregationBuilder; +import org.elasticsearch.search.aggregations.pipeline.MovAvgPipelineAggregator; import org.elasticsearch.search.aggregations.pipeline.MovFnPipelineAggregationBuilder; import org.elasticsearch.search.aggregations.pipeline.MovFnPipelineAggregator; +import org.elasticsearch.search.aggregations.pipeline.PercentilesBucketPipelineAggregationBuilder; +import org.elasticsearch.search.aggregations.pipeline.PercentilesBucketPipelineAggregator; +import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator; import org.elasticsearch.search.aggregations.pipeline.SerialDiffPipelineAggregationBuilder; import org.elasticsearch.search.aggregations.pipeline.SerialDiffPipelineAggregator; +import org.elasticsearch.search.aggregations.pipeline.SimpleModel; +import org.elasticsearch.search.aggregations.pipeline.StatsBucketPipelineAggregationBuilder; +import org.elasticsearch.search.aggregations.pipeline.StatsBucketPipelineAggregator; +import org.elasticsearch.search.aggregations.pipeline.SumBucketPipelineAggregationBuilder; +import org.elasticsearch.search.aggregations.pipeline.SumBucketPipelineAggregator; import org.elasticsearch.search.fetch.FetchPhase; import org.elasticsearch.search.fetch.FetchSubPhase; import org.elasticsearch.search.fetch.subphase.DocValueFieldsFetchSubPhase; @@ -311,6 +313,7 @@ public SearchModule(Settings settings, boolean transportClient, List getNamedWriteables() { @@ -803,6 +806,7 @@ private void registerQueryParsers(List plugins) { registerQuery(new QuerySpec<>(ExistsQueryBuilder.NAME, ExistsQueryBuilder::new, ExistsQueryBuilder::fromXContent)); registerQuery(new QuerySpec<>(MatchNoneQueryBuilder.NAME, MatchNoneQueryBuilder::new, MatchNoneQueryBuilder::fromXContent)); registerQuery(new QuerySpec<>(TermsSetQueryBuilder.NAME, TermsSetQueryBuilder::new, TermsSetQueryBuilder::fromXContent)); + registerQuery(new QuerySpec<>(IntervalQueryBuilder.NAME, IntervalQueryBuilder::new, IntervalQueryBuilder::fromXContent)); if (ShapesAvailability.JTS_AVAILABLE && ShapesAvailability.SPATIAL4J_AVAILABLE) { registerQuery(new QuerySpec<>(GeoShapeQueryBuilder.NAME, GeoShapeQueryBuilder::new, GeoShapeQueryBuilder::fromXContent)); @@ -811,6 +815,15 @@ private void registerQueryParsers(List plugins) { registerFromPlugin(plugins, SearchPlugin::getQueries, this::registerQuery); } + private void registerIntervalsSourceProviders() { + namedWriteables.add(new NamedWriteableRegistry.Entry(IntervalsSourceProvider.class, + IntervalsSourceProvider.Match.NAME, IntervalsSourceProvider.Match::new)); + namedWriteables.add(new NamedWriteableRegistry.Entry(IntervalsSourceProvider.class, + IntervalsSourceProvider.Combine.NAME, IntervalsSourceProvider.Combine::new)); + namedWriteables.add(new NamedWriteableRegistry.Entry(IntervalsSourceProvider.class, + IntervalsSourceProvider.Disjunction.NAME, IntervalsSourceProvider.Disjunction::new)); + } + private void registerQuery(QuerySpec spec) { namedWriteables.add(new NamedWriteableRegistry.Entry(QueryBuilder.class, spec.getName().getPreferredName(), spec.getReader())); namedXContents.add(new NamedXContentRegistry.Entry(QueryBuilder.class, spec.getName(), diff --git a/server/src/test/java/org/elasticsearch/index/query/IntervalBuilderTests.java b/server/src/test/java/org/elasticsearch/index/query/IntervalBuilderTests.java new file mode 100644 index 0000000000000..a565db41516a9 --- /dev/null +++ b/server/src/test/java/org/elasticsearch/index/query/IntervalBuilderTests.java @@ -0,0 +1,141 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.query; + +import org.apache.lucene.analysis.CachingTokenFilter; +import org.apache.lucene.analysis.CannedTokenStream; +import org.apache.lucene.analysis.Token; +import org.apache.lucene.analysis.standard.StandardAnalyzer; +import org.apache.lucene.search.intervals.Intervals; +import org.apache.lucene.search.intervals.IntervalsSource; +import org.elasticsearch.test.ESTestCase; + +import java.io.IOException; + +public class IntervalBuilderTests extends ESTestCase { + + private static final IntervalBuilder BUILDER = new IntervalBuilder("field1", new StandardAnalyzer()); + + public void testSimpleTerm() throws IOException { + + CannedTokenStream ts = new CannedTokenStream(new Token("term1", 1, 2)); + + IntervalsSource source = BUILDER.analyzeText(new CachingTokenFilter(ts), -1, true); + IntervalsSource expected = Intervals.term("term1"); + + assertEquals(expected, source); + } + + public void testOrdered() throws IOException { + + CannedTokenStream ts = new CannedTokenStream( + new Token("term1", 1, 2), + new Token("term2", 3, 4), + new Token("term3", 5, 6) + ); + + IntervalsSource source = BUILDER.analyzeText(new CachingTokenFilter(ts), -1, true); + IntervalsSource expected = Intervals.ordered( + Intervals.term("term1"), Intervals.term("term2"), Intervals.term("term3") + ); + + assertEquals(expected, source); + + } + + public void testUnordered() throws IOException { + + CannedTokenStream ts = new CannedTokenStream( + new Token("term1", 1, 2), + new Token("term2", 3, 4), + new Token("term3", 5, 6) + ); + + IntervalsSource source = BUILDER.analyzeText(new CachingTokenFilter(ts), -1, false); + IntervalsSource expected = Intervals.unordered( + Intervals.term("term1"), Intervals.term("term2"), Intervals.term("term3") + ); + + assertEquals(expected, source); + + } + + public void testPhrase() throws IOException { + + CannedTokenStream ts = new CannedTokenStream( + new Token("term1", 1, 2), + new Token("term2", 3, 4), + new Token("term3", 5, 6) + ); + + IntervalsSource source = BUILDER.analyzeText(new CachingTokenFilter(ts), 0, true); + IntervalsSource expected = Intervals.phrase( + Intervals.term("term1"), Intervals.term("term2"), Intervals.term("term3") + ); + + assertEquals(expected, source); + + } + + public void testSimpleSynonyms() throws IOException { + + CannedTokenStream ts = new CannedTokenStream( + new Token("term1", 1, 2), + new Token("term2", 3, 4), + new Token("term4", 0, 3, 4), + new Token("term3", 5, 6) + ); + + IntervalsSource source = BUILDER.analyzeText(new CachingTokenFilter(ts), -1, true); + IntervalsSource expected = Intervals.ordered( + Intervals.term("term1"), Intervals.or(Intervals.term("term2"), Intervals.term("term4")), Intervals.term("term3") + ); + + assertEquals(expected, source); + + } + + public void testGraphSynonyms() throws IOException { + + // term1 term2/term3:2 term4 term5 + + Token graphToken = new Token("term2", 3, 4); + graphToken.setPositionLength(2); + + CannedTokenStream ts = new CannedTokenStream( + new Token("term1", 1, 2), + graphToken, + new Token("term3", 0, 3, 4), + new Token("term4", 5, 6), + new Token("term5", 6, 7) + ); + + IntervalsSource source = BUILDER.analyzeText(new CachingTokenFilter(ts), -1, true); + IntervalsSource expected = Intervals.ordered( + Intervals.term("term1"), + Intervals.or(Intervals.term("term2"), Intervals.phrase("term3", "term4")), + Intervals.term("term5") + ); + + assertEquals(expected, source); + + } + +} diff --git a/server/src/test/java/org/elasticsearch/index/query/IntervalQueryBuilderTests.java b/server/src/test/java/org/elasticsearch/index/query/IntervalQueryBuilderTests.java new file mode 100644 index 0000000000000..06ab542ebc092 --- /dev/null +++ b/server/src/test/java/org/elasticsearch/index/query/IntervalQueryBuilderTests.java @@ -0,0 +1,280 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.query; + +import org.apache.lucene.search.BoostQuery; +import org.apache.lucene.search.MatchNoDocsQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.intervals.IntervalQuery; +import org.apache.lucene.search.intervals.Intervals; +import org.elasticsearch.common.ParsingException; +import org.elasticsearch.search.internal.SearchContext; +import org.elasticsearch.test.AbstractQueryTestCase; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.instanceOf; + +public class IntervalQueryBuilderTests extends AbstractQueryTestCase { + + @Override + protected IntervalQueryBuilder doCreateTestQueryBuilder() { + return new IntervalQueryBuilder(STRING_FIELD_NAME, createRandomSource()); + } + + @Override + public void testUnknownField() throws IOException { + super.testUnknownField(); + } + + private static final String[] filters = new String[]{ + "containing", "contained_by", "not_containing", "not_contained_by", "not_overlapping" + }; + + private IntervalsSourceProvider.IntervalFilter createRandomFilter() { + if (randomInt(20) > 18) { + return new IntervalsSourceProvider.IntervalFilter(createRandomSource(), randomFrom(filters)); + } + return null; + } + + private IntervalsSourceProvider createRandomSource() { + switch (randomInt(20)) { + case 0: + case 1: + int orCount = randomInt(4) + 1; + List orSources = new ArrayList<>(); + for (int i = 0; i < orCount; i++) { + orSources.add(createRandomSource()); + } + return new IntervalsSourceProvider.Disjunction(orSources, createRandomFilter()); + case 2: + case 3: + int count = randomInt(5) + 1; + List subSources = new ArrayList<>(); + for (int i = 0; i < count; i++) { + subSources.add(createRandomSource()); + } + boolean ordered = randomBoolean(); + int maxGaps = randomInt(5) - 1; + IntervalsSourceProvider.IntervalFilter filter = createRandomFilter(); + return new IntervalsSourceProvider.Combine(subSources, ordered, maxGaps, filter); + default: + int wordCount = randomInt(4) + 1; + List words = new ArrayList<>(); + for (int i = 0; i < wordCount; i++) { + words.add(randomRealisticUnicodeOfLengthBetween(4, 20)); + } + String text = String.join(" ", words); + boolean mOrdered = randomBoolean(); + int maxMGaps = randomInt(5) - 1; + String analyzer = randomFrom("simple", "keyword", "whitespace"); + return new IntervalsSourceProvider.Match(text, maxMGaps, mOrdered, analyzer, createRandomFilter()); + } + } + + @Override + protected void doAssertLuceneQuery(IntervalQueryBuilder queryBuilder, Query query, SearchContext context) throws IOException { + assertThat(query, instanceOf(IntervalQuery.class)); + } + + public void testMatchInterval() throws IOException { + + String json = "{ \"intervals\" : " + + "{ \"" + STRING_FIELD_NAME + "\" : { \"match\" : { \"query\" : \"Hello world\" } } } }"; + + IntervalQueryBuilder builder = (IntervalQueryBuilder) parseQuery(json); + Query expected = new IntervalQuery(STRING_FIELD_NAME, + Intervals.unordered(Intervals.term("hello"), Intervals.term("world"))); + + assertEquals(expected, builder.toQuery(createShardContext())); + + json = "{ \"intervals\" : " + + "{ \"" + STRING_FIELD_NAME + "\" : { " + + " \"match\" : { " + + " \"query\" : \"Hello world\"," + + " \"max_gaps\" : 40 } } } }"; + + builder = (IntervalQueryBuilder) parseQuery(json); + expected = new IntervalQuery(STRING_FIELD_NAME, + Intervals.maxgaps(40, Intervals.unordered(Intervals.term("hello"), Intervals.term("world")))); + assertEquals(expected, builder.toQuery(createShardContext())); + + json = "{ \"intervals\" : " + + "{ \"" + STRING_FIELD_NAME + "\" : { " + + " \"match\" : { " + + " \"query\" : \"Hello world\"," + + " \"ordered\" : true }," + + " \"boost\" : 2 } } }"; + + builder = (IntervalQueryBuilder) parseQuery(json); + expected = new BoostQuery(new IntervalQuery(STRING_FIELD_NAME, + Intervals.ordered(Intervals.term("hello"), Intervals.term("world"))), 2); + assertEquals(expected, builder.toQuery(createShardContext())); + + json = "{ \"intervals\" : " + + "{ \"" + STRING_FIELD_NAME + "\" : { " + + " \"match\" : { " + + " \"query\" : \"Hello world\"," + + " \"max_gaps\" : 10," + + " \"analyzer\" : \"whitespace\"," + + " \"ordered\" : true } } } }"; + + builder = (IntervalQueryBuilder) parseQuery(json); + expected = new IntervalQuery(STRING_FIELD_NAME, + Intervals.maxgaps(10, Intervals.ordered(Intervals.term("Hello"), Intervals.term("world")))); + assertEquals(expected, builder.toQuery(createShardContext())); + + json = "{ \"intervals\" : " + + "{ \"" + STRING_FIELD_NAME + "\" : { " + + " \"match\" : { " + + " \"query\" : \"Hello world\"," + + " \"max_gaps\" : 10," + + " \"analyzer\" : \"whitespace\"," + + " \"ordered\" : true," + + " \"filter\" : {" + + " \"containing\" : {" + + " \"match\" : { \"query\" : \"blah\" } } } } } } }"; + + builder = (IntervalQueryBuilder) parseQuery(json); + expected = new IntervalQuery(STRING_FIELD_NAME, + Intervals.containing(Intervals.maxgaps(10, Intervals.ordered(Intervals.term("Hello"), Intervals.term("world"))), + Intervals.term("blah"))); + assertEquals(expected, builder.toQuery(createShardContext())); + } + + public void testOrInterval() throws IOException { + + String json = "{ \"intervals\" : { \"" + STRING_FIELD_NAME + "\": {" + + " \"any_of\" : { " + + " \"intervals\" : [" + + " { \"match\" : { \"query\" : \"one\" } }," + + " { \"match\" : { \"query\" : \"two\" } } ] } } } }"; + IntervalQueryBuilder builder = (IntervalQueryBuilder) parseQuery(json); + Query expected = new IntervalQuery(STRING_FIELD_NAME, + Intervals.or(Intervals.term("one"), Intervals.term("two"))); + assertEquals(expected, builder.toQuery(createShardContext())); + + json = "{ \"intervals\" : { \"" + STRING_FIELD_NAME + "\": {" + + " \"any_of\" : { " + + " \"intervals\" : [" + + " { \"match\" : { \"query\" : \"one\" } }," + + " { \"match\" : { \"query\" : \"two\" } } ]," + + " \"filter\" : {" + + " \"not_containing\" : { \"match\" : { \"query\" : \"three\" } } } } } } }"; + builder = (IntervalQueryBuilder) parseQuery(json); + expected = new IntervalQuery(STRING_FIELD_NAME, + Intervals.notContaining( + Intervals.or(Intervals.term("one"), Intervals.term("two")), + Intervals.term("three"))); + assertEquals(expected, builder.toQuery(createShardContext())); + } + + public void testCombineInterval() throws IOException { + + String json = "{ \"intervals\" : { \"" + STRING_FIELD_NAME + "\": {" + + " \"all_of\" : {" + + " \"ordered\" : true," + + " \"intervals\" : [" + + " { \"match\" : { \"query\" : \"one\" } }," + + " { \"all_of\" : { " + + " \"ordered\" : false," + + " \"intervals\" : [" + + " { \"match\" : { \"query\" : \"two\" } }," + + " { \"match\" : { \"query\" : \"three\" } } ] } } ]," + + " \"max_gaps\" : 30," + + " \"filter\" : { " + + " \"contained_by\" : { " + + " \"match\" : { " + + " \"query\" : \"SENTENCE\"," + + " \"analyzer\" : \"keyword\" } } } }," + + " \"boost\" : 1.5 } } }"; + IntervalQueryBuilder builder = (IntervalQueryBuilder) parseQuery(json); + Query expected = new BoostQuery(new IntervalQuery(STRING_FIELD_NAME, + Intervals.containedBy( + Intervals.maxgaps(30, Intervals.ordered( + Intervals.term("one"), + Intervals.unordered(Intervals.term("two"), Intervals.term("three")))), + Intervals.term("SENTENCE"))), 1.5f); + assertEquals(expected, builder.toQuery(createShardContext())); + + } + + public void testCombineDisjunctionInterval() throws IOException { + String json = "{ \"intervals\" : " + + "{ \"" + STRING_FIELD_NAME + "\": { " + + " \"all_of\" : {" + + " \"ordered\" : true," + + " \"intervals\" : [" + + " { \"match\" : { \"query\" : \"atmosphere\" } }," + + " { \"any_of\" : {" + + " \"intervals\" : [" + + " { \"match\" : { \"query\" : \"cold\" } }," + + " { \"match\" : { \"query\" : \"outside\" } } ] } } ]," + + " \"max_gaps\" : 30," + + " \"filter\" : { " + + " \"not_contained_by\" : { " + + " \"match\" : { \"query\" : \"freeze\" } } } } } } }"; + + IntervalQueryBuilder builder = (IntervalQueryBuilder) parseQuery(json); + Query expected = new IntervalQuery(STRING_FIELD_NAME, + Intervals.notContainedBy( + Intervals.maxgaps(30, Intervals.ordered( + Intervals.term("atmosphere"), + Intervals.or(Intervals.term("cold"), Intervals.term("outside")) + )), + Intervals.term("freeze"))); + assertEquals(expected, builder.toQuery(createShardContext())); + } + + public void testNonIndexedFields() throws IOException { + IntervalsSourceProvider provider = createRandomSource(); + IntervalQueryBuilder b = new IntervalQueryBuilder("no_such_field", provider); + assertThat(b.toQuery(createShardContext()), equalTo(new MatchNoDocsQuery())); + + Exception e = expectThrows(IllegalArgumentException.class, () -> { + IntervalQueryBuilder builder = new IntervalQueryBuilder(INT_FIELD_NAME, provider); + builder.doToQuery(createShardContext()); + }); + assertThat(e.getMessage(), equalTo("Cannot create IntervalQuery over field [" + INT_FIELD_NAME + "] with no indexed positions")); + + e = expectThrows(IllegalArgumentException.class, () -> { + IntervalQueryBuilder builder = new IntervalQueryBuilder(STRING_FIELD_NAME_2, provider); + builder.doToQuery(createShardContext()); + }); + assertThat(e.getMessage(), equalTo("Cannot create IntervalQuery over field [" + + STRING_FIELD_NAME_2 + "] with no indexed positions")); + } + + public void testMultipleProviders() { + String json = "{ \"intervals\" : { \"" + STRING_FIELD_NAME + "\": { " + + "\"boost\" : 1," + + "\"match\" : { \"query\" : \"term1\" }," + + "\"all_of\" : { \"intervals\" : [ { \"query\" : \"term2\" } ] } }"; + + ParsingException e = expectThrows(ParsingException.class, () -> { + parseQuery(json); + }); + assertThat(e.getMessage(), equalTo("Only one interval rule can be specified, found [match] and [all_of]")); + } +} diff --git a/server/src/test/java/org/elasticsearch/search/SearchModuleTests.java b/server/src/test/java/org/elasticsearch/search/SearchModuleTests.java index cf5b3fc0fc13b..321d50278b8be 100644 --- a/server/src/test/java/org/elasticsearch/search/SearchModuleTests.java +++ b/server/src/test/java/org/elasticsearch/search/SearchModuleTests.java @@ -328,6 +328,7 @@ public List> getRescorers() { "geo_polygon", "geo_shape", "ids", + "intervals", "match", "match_all", "match_none", diff --git a/server/src/test/java/org/elasticsearch/search/query/SearchQueryIT.java b/server/src/test/java/org/elasticsearch/search/query/SearchQueryIT.java index 2b6b8b1c60bcc..0c2b0829c5f75 100644 --- a/server/src/test/java/org/elasticsearch/search/query/SearchQueryIT.java +++ b/server/src/test/java/org/elasticsearch/search/query/SearchQueryIT.java @@ -1311,6 +1311,28 @@ public void testMustNot() throws IOException, ExecutionException, InterruptedExc assertHitCount(searchResponse, 2L); } + public void testIntervals() throws InterruptedException { + createIndex("test"); + + indexRandom(true, + client().prepareIndex("test", "test", "1") + .setSource("description", "it's cold outside, there's no kind of atmosphere")); + + String json = "{ \"intervals\" : " + + "{ \"description\": { " + + " \"all_of\" : {" + + " \"ordered\" : \"true\"," + + " \"intervals\" : [" + + " { \"any_of\" : {" + + " \"intervals\" : [" + + " { \"match\" : { \"query\" : \"cold\" } }," + + " { \"match\" : { \"query\" : \"outside\" } } ] } }," + + " { \"match\" : { \"query\" : \"atmosphere\" } } ]," + + " \"max_gaps\" : 30 } } } }"; + SearchResponse response = client().prepareSearch("test").setQuery(wrapperQuery(json)).get(); + assertHitCount(response, 1L); + } + // see #2994 public void testSimpleSpan() throws IOException, ExecutionException, InterruptedException { createIndex("test");