From 1a067c1a5f9b80cef760bbf5095fcca87ef72aa0 Mon Sep 17 00:00:00 2001 From: Christophe Bismuth Date: Fri, 30 Nov 2018 16:10:13 +0100 Subject: [PATCH] Add `minimum_should_match` section to the query_string docs Closes #34142 --- .../query-dsl/query-string-query.asciidoc | 132 ++++++++++++++++-- 1 file changed, 124 insertions(+), 8 deletions(-) diff --git a/docs/reference/query-dsl/query-string-query.asciidoc b/docs/reference/query-dsl/query-string-query.asciidoc index 9cb0944a99cb5..759bc1d2eadca 100644 --- a/docs/reference/query-dsl/query-string-query.asciidoc +++ b/docs/reference/query-dsl/query-string-query.asciidoc @@ -28,14 +28,14 @@ GET /_search "query": { "query_string" : { "default_field" : "content", - "query" : "(new york city) OR (big apple)" + "query" : "(new york city) OR (big apple)" <1> } } } -------------------------------------------------- // CONSOLE -... will be split into `new york city` and `big apple` and each part is then +<1> will be split into `new york city` and `big apple` and each part is then analyzed independently by the analyzer configured for the field. WARNING: Whitespaces are not considered operators, this means that `new york city` @@ -48,7 +48,6 @@ When multiple fields are provided it is also possible to modify how the differen field queries are combined inside each textual part using the `type` parameter. The possible modes are described <> and the default is `best_fields`. - The `query_string` top level parameters include: [cols="<,<",options="header",] @@ -109,8 +108,8 @@ not analyzed. By setting this value to `true`, a best effort will be made to analyze those as well. |`max_determinized_states` |Limit on how many automaton states regexp -queries are allowed to create. This protects against too-difficult -(e.g. exponentially hard) regexps. Defaults to 10000. +queries are allowed to create. This protects against too-difficult +(e.g. exponentially hard) regexps. Defaults to 10000. |`minimum_should_match` |A value controlling how many "should" clauses in the resulting boolean query should match. It can be an absolute value @@ -159,7 +158,7 @@ include nested documents, use a nested query to search those documents. ==== Multi Field The `query_string` query can also run against multiple fields. Fields can be -provided via the `"fields"` parameter (example below). +provided via the `fields` parameter (example below). The idea of running the `query_string` query against multiple fields is to expand each query term to an OR clause like this: @@ -199,7 +198,7 @@ GET /_search // CONSOLE Since several queries are generated from the individual search terms, -combining them is automatically done using a `dis_max` query with a tie_breaker. +combining them is automatically done using a `dis_max` query with a `tie_breaker`. For example (the `name` is boosted by 5 using `^5` notation): [source,js] @@ -294,7 +293,7 @@ GET /_search The `query_string` query supports multi-terms synonym expansion with the <> token filter. When this filter is used, the parser creates a phrase query for each multi-terms synonyms. -For example, the following synonym: `"ny, new york" would produce:` +For example, the following synonym: `ny, new york` would produce: `(ny OR ("new york"))` @@ -322,4 +321,121 @@ The example above creates a boolean query: that matches documents with the term `ny` or the conjunction `new AND york`. By default the parameter `auto_generate_synonyms_phrase_query` is set to `true`. +[float] +==== Minimum should match + +The `query_string` splits the query around each operator to create a boolean +query for the entire input. You can use `minimum_should_match` to control how +many "should" clauses in the resulting query should match. + +[source,js] +-------------------------------------------------- +GET /_search +{ + "query": { + "query_string": { + "fields": [ + "title" + ], + "query": "this that thus", + "minimum_should_match": 2 + } + } +} +-------------------------------------------------- +// CONSOLE + +The example above creates a boolean query: + +`(title:this title:that title:thus)~2` + +that matches documents with at least two of the terms `this`, `that` or `thus` +in the single field `title`. + +[float] +===== Multi Field + +[source,js] +-------------------------------------------------- +GET /_search +{ + "query": { + "query_string": { + "fields": [ + "title", + "content" + ], + "query": "this that thus", + "minimum_should_match": 2 + } + } +} +-------------------------------------------------- +// CONSOLE + +The example above creates a boolean query: + +`((content:this content:that content:thus) | (title:this title:that title:thus))` + +that matches documents with the disjunction max over the fields `title` and +`content`. Here the `minimum_should_match` parameter can't be applied. + +[source,js] +-------------------------------------------------- +GET /_search +{ + "query": { + "query_string": { + "fields": [ + "title", + "content" + ], + "query": "this OR that OR thus", + "minimum_should_match": 2 + } + } +} +-------------------------------------------------- +// CONSOLE + +Adding explicit operators forces each term to be considered as a separate clause. + +The example above creates a boolean query: + +`((content:this | title:this) (content:that | title:that) (content:thus | title:thus))~2` + +that matches documents with at least two of the three "should" clauses, each of +them made of the disjunction max over the fields for each term. + +[float] +===== Cross Field + +[source,js] +-------------------------------------------------- +GET /_search +{ + "query": { + "query_string": { + "fields": [ + "title", + "content" + ], + "query": "this OR that OR thus", + "type": "cross_fields", + "minimum_should_match": 2 + } + } +} +-------------------------------------------------- +// CONSOLE + +The `cross_fields` value in the `type` field indicates that fields that have the +same analyzer should be grouped together when the input is analyzed. + +The example above creates a boolean query: + +`(blended(terms:[field2:this, field1:this]) blended(terms:[field2:that, field1:that]) blended(terms:[field2:thus, field1:thus]))~2` + +that matches documents with at least two of the three per-term blended queries. + include::query-string-syntax.asciidoc[]