diff --git a/docs/build.gradle b/docs/build.gradle index 533cc5db8d2bd..cefb03d4b40c7 100644 --- a/docs/build.gradle +++ b/docs/build.gradle @@ -141,6 +141,9 @@ Closure setupMyIndex = { String name, int count -> type: keyword message: type: text + fields: + keyword: + type: keyword user: properties: id: diff --git a/docs/reference/aggregations.asciidoc b/docs/reference/aggregations.asciidoc index 472c028b0c87a..26b6a53da2bba 100644 --- a/docs/reference/aggregations.asciidoc +++ b/docs/reference/aggregations.asciidoc @@ -330,79 +330,66 @@ the aggregated field. [[use-scripts-in-an-agg]] === Use scripts in an aggregation -Some aggregations support <>. You can -use a `script` to extract or generate values for the aggregation: +When a field doesn't exactly match the aggregation you need, you +should aggregate on a <>: [source,console] ---- -GET /my-index-000001/_search +GET /my-index-000001/_search?size=0 { + "runtime_mappings": { + "message.length": { + "type": "long", + "script": "emit(doc['message.keyword'].value.length())" + } + }, "aggs": { - "my-agg-name": { + "message_length": { "histogram": { - "interval": 1000, - "script": { - "source": "doc['my-field'].value.length()" - } + "interval": 10, + "field": "message.length" } } } } ---- // TEST[setup:my_index] -// TEST[s/my-field/http.request.method/] -If you also specify a `field`, the `script` modifies the field values used in -the aggregation. The following aggregation uses a script to modify `my-field` -values: - -[source,console] +//// +[source,console-result] ---- -GET /my-index-000001/_search { - "aggs": { - "my-agg-name": { - "histogram": { - "field": "my-field", - "interval": 1000, - "script": "_value / 1000" - } + "timed_out": false, + "took": "$body.took", + "_shards": { + "total": 1, + "successful": 1, + "failed": 0, + "skipped": 0 + }, + "hits": "$body.hits", + "aggregations": { + "message_length": { + "buckets": [ + { + "key": 30.0, + "doc_count": 5 + } + ] } } } ---- -// TEST[setup:my_index] -// TEST[s/my-field/http.response.bytes/] - -Some aggregations only work on specific data types. Use the `value_type` -parameter to specify a data type for a script-generated value or an unmapped -field. `value_type` accepts the following values: +//// -* `boolean` -* `date` -* `double`, used for all floating-point numbers -* `long`, used for all integers -* `ip` -* `string` +Scripts calculate field values dynamically, which adds a little +overhead to the aggregation. In addition to the time spent calculating, +some aggregations like <> +and <> can't use +some of their optimizations with runtime fields. In total, performance costs +for using a runtime field varies from aggregation to aggregation. -[source,console] ----- -GET /my-index-000001/_search -{ - "aggs": { - "my-agg-name": { - "histogram": { - "field": "my-field", - "interval": 1000, - "script": "_value / 1000", - "value_type": "long" - } - } - } -} ----- -// TEST[setup:my_index] -// TEST[s/my-field/http.response.bytes/] +// TODO when we have calculated fields we can link to them here. [discrete] [[agg-caches]] diff --git a/docs/reference/aggregations/bucket/autodatehistogram-aggregation.asciidoc b/docs/reference/aggregations/bucket/autodatehistogram-aggregation.asciidoc index 83609acba68e5..c9d75a4ea17a2 100644 --- a/docs/reference/aggregations/bucket/autodatehistogram-aggregation.asciidoc +++ b/docs/reference/aggregations/bucket/autodatehistogram-aggregation.asciidoc @@ -251,12 +251,6 @@ instead of the usual 24 hours for other buckets. The same is true for shorter in like e.g. 12h. Here, we will have only a 11h bucket on the morning of 27 March when the DST shift happens. -==== Scripts - -Like with the normal <>, both document level -scripts and value level scripts are supported. This aggregation does not however, support the `min_doc_count`, -`extended_bounds`, `hard_bounds` and `order` parameters. - ==== Minimum Interval parameter The `minimum_interval` allows the caller to specify the minimum rounding interval that should be used. diff --git a/docs/reference/aggregations/bucket/composite-aggregation.asciidoc b/docs/reference/aggregations/bucket/composite-aggregation.asciidoc index 1f519f0a34699..60e26096a7cf0 100644 --- a/docs/reference/aggregations/bucket/composite-aggregation.asciidoc +++ b/docs/reference/aggregations/bucket/composite-aggregation.asciidoc @@ -18,7 +18,7 @@ a composite bucket. ////////////////////////// -[source,js] +[source,console] -------------------------------------------------- PUT /sales { @@ -72,7 +72,6 @@ POST /sales/_bulk?refresh {"index":{"_id":4}} {"product": "apocalypse now", "price": "10", "timestamp": "2017-05-11T08:35"} ------------------------------------------------- -// NOTCONSOLE // TESTSETUP ////////////////////////// @@ -121,7 +120,7 @@ The `sources` parameter can be any of the following types: ===== Terms The `terms` value source is equivalent to a simple `terms` aggregation. -The values are extracted from a field or a script exactly like the `terms` aggregation. +The values are extracted from a field exactly like the `terms` aggregation. Example: @@ -142,25 +141,30 @@ GET /_search } -------------------------------------------------- -Like the `terms` aggregation it is also possible to use a script to create the values for the composite buckets: +Like the `terms` aggregation, it's possible to use a +<> to create values for the composite buckets: -[source,console] --------------------------------------------------- +[source,console,id=composite-aggregation-terms-runtime-field-example] +---- GET /_search { + "runtime_mappings": { + "day_of_week": { + "type": "keyword", + "script": """ + emit(doc['timestamp'].value.dayOfWeekEnum + .getDisplayName(TextStyle.FULL, Locale.ROOT)) + """ + } + }, "size": 0, "aggs": { "my_buckets": { "composite": { "sources": [ { - "product": { - "terms": { - "script": { - "source": "doc['product'].value", - "lang": "painless" - } - } + "dow": { + "terms": { "field": "day_of_week" } } } ] @@ -168,7 +172,35 @@ GET /_search } } } --------------------------------------------------- +---- + +//// +[source,console-result] +---- +{ + "timed_out": false, + "took": "$body.took", + "_shards": { + "total": 1, + "successful": 1, + "failed": 0, + "skipped": 0 + }, + "hits": "$body.hits", + "aggregations": { + "my_buckets": { + "after_key": { "dow": "Wednesday" }, + "buckets": [ + { "key": { "dow": "Monday" }, "doc_count": 1 }, + { "key": { "dow": "Thursday" }, "doc_count": 1 }, + { "key": { "dow": "Tuesday" }, "doc_count": 2 }, + { "key": { "dow": "Wednesday" }, "doc_count": 1 } + ] + } + } +} +---- +//// [[_histogram]] ===== Histogram @@ -197,25 +229,35 @@ GET /_search } -------------------------------------------------- -The values are built from a numeric field or a script that return numerical values: +Like the `histogram` aggregation it's possible to use a +<> to create values for the composite buckets: -[source,console] --------------------------------------------------- +[source,console,id=composite-aggregation-histogram-runtime-field-example] +---- GET /_search { + "runtime_mappings": { + "price.discounted": { + "type": "double", + "script": """ + double price = doc['price'].value; + if (doc['product'].value == 'mad max') { + price *= 0.8; + } + emit(price); + """ + } + }, "size": 0, "aggs": { "my_buckets": { "composite": { "sources": [ { - "histo": { + "price": { "histogram": { "interval": 5, - "script": { - "source": "doc['price'].value", - "lang": "painless" - } + "field": "price.discounted" } } } @@ -224,7 +266,34 @@ GET /_search } } } --------------------------------------------------- +---- + +//// +[source,console-result] +---- +{ + "timed_out": false, + "took": "$body.took", + "_shards": { + "total": 1, + "successful": 1, + "failed": 0, + "skipped": 0 + }, + "hits": "$body.hits", + "aggregations": { + "my_buckets": { + "after_key": { "price": 20.0 }, + "buckets": [ + { "key": { "price": 10.0 }, "doc_count": 2 }, + { "key": { "price": 15.0 }, "doc_count": 1 }, + { "key": { "price": 20.0 }, "doc_count": 2 } + ] + } + } +} +---- +//// [[_date_histogram]] ===== Date histogram diff --git a/docs/reference/aggregations/bucket/datehistogram-aggregation.asciidoc b/docs/reference/aggregations/bucket/datehistogram-aggregation.asciidoc index 4693c1f5b0383..8d285529a603a 100644 --- a/docs/reference/aggregations/bucket/datehistogram-aggregation.asciidoc +++ b/docs/reference/aggregations/bucket/datehistogram-aggregation.asciidoc @@ -621,9 +621,75 @@ Response: [[date-histogram-scripts]] ==== Scripts -As with the normal <>, -both document-level scripts and -value-level scripts are supported. You can control the order of the returned +If the data in your documents doesn't exactly match what you'd like to aggregate, +use a <> . For example, if the revenue +for promoted sales should be recognized a day after the sale date: + +[source,console,id=datehistogram-aggregation-runtime-field] +---- +POST /sales/_search?size=0 +{ + "runtime_mappings": { + "date.promoted_is_tomorrow": { + "type": "date", + "script": """ + long date = doc['date'].value.toInstant().toEpochMilli(); + if (doc['promoted'].value) { + date += 86400; + } + emit(date); + """ + } + }, + "aggs": { + "sales_over_time": { + "date_histogram": { + "field": "date.promoted_is_tomorrow", + "calendar_interval": "1M" + } + } + } +} +---- +// TEST[setup:sales] + +//// + +[source,console-result] +---- +{ + ... + "aggregations": { + "sales_over_time": { + "buckets": [ + { + "key_as_string": "2015-01-01T00:00:00.000Z", + "key": 1420070400000, + "doc_count": 3 + }, + { + "key_as_string": "2015-02-01T00:00:00.000Z", + "key": 1422748800000, + "doc_count": 2 + }, + { + "key_as_string": "2015-03-01T00:00:00.000Z", + "key": 1425168000000, + "doc_count": 2 + } + ] + } + } +} +---- +// TESTRESPONSE[s/\.\.\./"took": $body.took,"timed_out": false,"_shards": $body._shards,"hits": $body.hits,/] + +//// + +[[date-histogram-params]] +==== Parameters + +You can control the order of the returned buckets using the `order` settings and filter the returned buckets based on a `min_doc_count` setting (by default all buckets between the first @@ -673,51 +739,52 @@ the `order` setting. This setting supports the same `order` functionality as [[date-histogram-aggregate-scripts]] ===== Using a script to aggregate by day of the week -When you need to aggregate the results by day of the week, use a script that -returns the day of the week: +When you need to aggregate the results by day of the week, run a `terms` +aggregation on a <> that returns the day of the week: -[source,console,id=datehistogram-aggregation-script-example] --------------------------------------------------- +[source,console,id=datehistogram-aggregation-day-of-week-runtime-field] +---- POST /sales/_search?size=0 { + "runtime_mappings": { + "date.day_of_week": { + "type": "keyword", + "script": "emit(doc['date'].value.dayOfWeekEnum.getDisplayName(TextStyle.FULL, Locale.ROOT))" + } + }, "aggs": { - "dayOfWeek": { - "terms": { - "script": { - "lang": "painless", - "source": "doc['date'].value.dayOfWeekEnum.value" - } - } + "day_of_week": { + "terms": { "field": "date.day_of_week" } } } } --------------------------------------------------- +---- // TEST[setup:sales] Response: [source,console-result] --------------------------------------------------- +---- { ... "aggregations": { - "dayOfWeek": { + "day_of_week": { "doc_count_error_upper_bound": 0, "sum_other_doc_count": 0, "buckets": [ { - "key": "7", + "key": "Sunday", "doc_count": 4 }, { - "key": "4", + "key": "Thursday", "doc_count": 3 } ] } } } --------------------------------------------------- +---- // TESTRESPONSE[s/\.\.\./"took": $body.took,"timed_out": false,"_shards": $body._shards,"hits": $body.hits,/] The response will contain all the buckets having the relative day of diff --git a/docs/reference/aggregations/bucket/diversified-sampler-aggregation.asciidoc b/docs/reference/aggregations/bucket/diversified-sampler-aggregation.asciidoc index 4b829255db38d..c3c99fae85be0 100644 --- a/docs/reference/aggregations/bucket/diversified-sampler-aggregation.asciidoc +++ b/docs/reference/aggregations/bucket/diversified-sampler-aggregation.asciidoc @@ -18,10 +18,10 @@ a large spike in a timeline or an over-active forum spammer). * Removing bias from analytics by ensuring fair representation of content from different sources * Reducing the running cost of aggregations that can produce useful results using only samples e.g. `significant_terms` -A choice of `field` or `script` setting is used to provide values used for de-duplication and the `max_docs_per_value` setting controls the maximum +The `field` setting is used to provide values used for de-duplication and the `max_docs_per_value` setting controls the maximum number of documents collected on any one shard which share a common value. The default setting for `max_docs_per_value` is 1. -The aggregation will throw an error if the choice of `field` or `script` produces multiple values for a single document (de-duplication using multi-valued fields is not supported due to efficiency concerns). +The aggregation will throw an error if the `field` produces multiple values for a single document (de-duplication using multi-valued fields is not supported due to efficiency concerns). Example: @@ -89,13 +89,14 @@ Response: <1> 151 documents were sampled in total. <2> The results of the significant_terms aggregation are not skewed by any single author's quirks because we asked for a maximum of one post from any one author in our sample. -==== Scripted example: +==== Scripted example -In this scenario we might want to diversify on a combination of field values. We can use a `script` to produce a hash of the -multiple values in a tags field to ensure we don't have a sample that consists of the same repeated combinations of tags. +In this scenario we might want to diversify on a combination of field values. We can use a <> to +produce a hash of the multiple values in a tags field to ensure we don't have a sample that consists of the same +repeated combinations of tags. -[source,console] --------------------------------------------------- +[source,console,id=diversified-sampler-aggregation-runtime-field-example] +---- POST /stackoverflow/_search?size=0 { "query": { @@ -103,15 +104,18 @@ POST /stackoverflow/_search?size=0 "query": "tags:kibana" } }, + "runtime_mappings": { + "tags.hash": { + "type": "long", + "script": "emit(doc['tags'].hashCode())" + } + }, "aggs": { "my_unbiased_sample": { "diversified_sampler": { "shard_size": 200, "max_docs_per_value": 3, - "script": { - "lang": "painless", - "source": "doc['tags'].hashCode()" - } + "field": "tags.hash" }, "aggs": { "keywords": { @@ -124,13 +128,13 @@ POST /stackoverflow/_search?size=0 } } } --------------------------------------------------- +---- // TEST[setup:stackoverflow] Response: [source,console-result] --------------------------------------------------- +---- { ... "aggregations": { @@ -157,7 +161,7 @@ Response: } } } --------------------------------------------------- +---- // TESTRESPONSE[s/\.\.\./"took": $body.took,"timed_out": false,"_shards": $body._shards,"hits": $body.hits,/] // TESTRESPONSE[s/2.213/$body.aggregations.my_unbiased_sample.keywords.buckets.0.score/] // TESTRESPONSE[s/1.34/$body.aggregations.my_unbiased_sample.keywords.buckets.1.score/] diff --git a/docs/reference/aggregations/bucket/multi-terms-aggregation.asciidoc b/docs/reference/aggregations/bucket/multi-terms-aggregation.asciidoc index 0f545f5aacfd2..0997d8f50240e 100644 --- a/docs/reference/aggregations/bucket/multi-terms-aggregation.asciidoc +++ b/docs/reference/aggregations/bucket/multi-terms-aggregation.asciidoc @@ -169,19 +169,22 @@ collect_mode:: Optional. Specifies the strategy for data collecti Generating the terms using a script: -[source,console,id=multi-terms-aggregation-script-example] --------------------------------------------------- +[source,console,id=multi-terms-aggregation-runtime-field-example] +---- GET /products/_search { + "runtime_mappings": { + "genre.length": { + "type": "long", + "script": "emit(doc['genre'].value.length())" + } + }, "aggs": { "genres_and_products": { "multi_terms": { "terms": [ { - "script": { - "source": "doc['genre'].value.length()", - "lang": "painless" - } + "field": "genre.length" }, { "field": "product" @@ -191,7 +194,7 @@ GET /products/_search } } } --------------------------------------------------- +---- // TEST[s/_search/_search\?filter_path=aggregations/] Response: @@ -207,7 +210,7 @@ Response: "buckets" : [ { "key" : [ - "4", + 4, "Product A" ], "key_as_string" : "4|Product A", @@ -215,7 +218,7 @@ Response: }, { "key" : [ - "4", + 4, "Product B" ], "key_as_string" : "4|Product B", @@ -223,7 +226,7 @@ Response: }, { "key" : [ - "10", + 10, "Product B" ], "key_as_string" : "10|Product B", diff --git a/docs/reference/aggregations/bucket/range-aggregation.asciidoc b/docs/reference/aggregations/bucket/range-aggregation.asciidoc index 3dc58721eaea8..7e7055b7466e6 100644 --- a/docs/reference/aggregations/bucket/range-aggregation.asciidoc +++ b/docs/reference/aggregations/bucket/range-aggregation.asciidoc @@ -172,22 +172,31 @@ Response: ==== Script -Range aggregation accepts a `script` parameter. This parameter allows to defined an inline `script` that -will be executed during aggregation execution. +If the data in your documents doesn't exactly match what you'd like to aggregate, +use a <>. For example, if you need to +apply a particular currency conversion rate: -The following example shows how to use an `inline` script with the `painless` script language and no script parameters: - -[source,console] --------------------------------------------------- +[source,console,id=range-aggregation-runtime-field-example] +---- GET /_search { + "runtime_mappings": { + "price.euros": { + "type": "double", + "script": { + "source": """ + emit(doc['price'].value * params.conversion_rate) + """, + "params": { + "conversion_rate": 0.835526591 + } + } + } + }, "aggs": { "price_ranges": { "range": { - "script": { - "lang": "painless", - "source": "doc['price'].value" - }, + "field": "price.euros", "ranges": [ { "to": 100 }, { "from": 100, "to": 200 }, @@ -197,109 +206,42 @@ GET /_search } } } --------------------------------------------------- - -It is also possible to use stored scripts. Here is a simple stored script: - -[source,console] --------------------------------------------------- -POST /_scripts/convert_currency -{ - "script": { - "lang": "painless", - "source": "doc[params.field].value * params.conversion_rate" - } -} --------------------------------------------------- +---- // TEST[setup:sales] - -And this new stored script can be used in the range aggregation like this: - -[source,console] --------------------------------------------------- -GET /_search -{ - "aggs": { - "price_ranges": { - "range": { - "script": { - "id": "convert_currency", <1> - "params": { <2> - "field": "price", - "conversion_rate": 0.835526591 - } - }, - "ranges": [ - { "from": 0, "to": 100 }, - { "from": 100 } - ] - } - } - } -} --------------------------------------------------- // TEST[s/GET \/_search/GET \/_search\?filter_path=aggregations/] -// TEST[continued] -<1> Id of the stored script -<2> Parameters to use when executing the stored script ////////////////////////// [source,console-result] --------------------------------------------------- +---- { "aggregations": { "price_ranges": { "buckets": [ { - "key": "0.0-100.0", - "from": 0.0, + "key": "*-100.0", "to": 100.0, "doc_count": 2 }, { - "key": "100.0-*", + "key": "100.0-200.0", "from": 100.0, + "to": 200.0, "doc_count": 5 + }, + { + "key": "200.0-*", + "from": 200.0, + "doc_count": 0 } ] } } } --------------------------------------------------- +---- ////////////////////////// -==== Value Script - -Lets say the product prices are in USD but we would like to get the price ranges in EURO. We can use value script to convert the prices prior the aggregation (assuming conversion rate of 0.8) - -[source,console] --------------------------------------------------- -GET /sales/_search -{ - "aggs": { - "price_ranges": { - "range": { - "field": "price", - "script": { - "source": "_value * params.conversion_rate", - "params": { - "conversion_rate": 0.8 - } - }, - "ranges": [ - { "to": 35 }, - { "from": 35, "to": 70 }, - { "from": 70 } - ] - } - } - } -} --------------------------------------------------- -// TEST[setup:sales] - ==== Sub Aggregations The following example, not only "bucket" the documents to the different buckets but also computes statistics over the prices in each price range