diff --git a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/precommit/LicenseAnalyzer.java b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/precommit/LicenseAnalyzer.java index c52ea9aaeb6f5..533066168c604 100644 --- a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/precommit/LicenseAnalyzer.java +++ b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/precommit/LicenseAnalyzer.java @@ -102,7 +102,7 @@ public class LicenseAnalyzer { AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - SOFTWARE\\. + SOFTWARE\\.? """).replaceAll("\\s+", "\\\\s*"), Pattern.DOTALL)), new LicenseMatcher( "MIT-0", diff --git a/docs/changelog/109017.yaml b/docs/changelog/109017.yaml new file mode 100644 index 0000000000000..80bcdd6fc0e25 --- /dev/null +++ b/docs/changelog/109017.yaml @@ -0,0 +1,6 @@ +pr: 109017 +summary: "ESQL: Add `MV_PSERIES_WEIGHTED_SUM` for score calculations used by security\ + \ solution" +area: ES|QL +type: "feature" +issues: [ ] diff --git a/docs/changelog/110548.yaml b/docs/changelog/110548.yaml new file mode 100644 index 0000000000000..d1c0952920889 --- /dev/null +++ b/docs/changelog/110548.yaml @@ -0,0 +1,6 @@ +pr: 110548 +summary: "[ES|QL] Add `CombineBinaryComparisons` rule" +area: ES|QL +type: enhancement +issues: + - 108525 diff --git a/docs/changelog/110630.yaml b/docs/changelog/110630.yaml new file mode 100644 index 0000000000000..9bf78e1209753 --- /dev/null +++ b/docs/changelog/110630.yaml @@ -0,0 +1,5 @@ +pr: 110630 +summary: Telemetry for inference adaptive allocations +area: Machine Learning +type: feature +issues: [] diff --git a/docs/changelog/110796.yaml b/docs/changelog/110796.yaml new file mode 100644 index 0000000000000..a54a9a08bbd27 --- /dev/null +++ b/docs/changelog/110796.yaml @@ -0,0 +1,5 @@ +pr: 110796 +summary: Remove needless forking to GENERIC in `TransportMultiSearchAction` +area: Search +type: bug +issues: [] diff --git a/docs/changelog/111212.yaml b/docs/changelog/111212.yaml new file mode 100644 index 0000000000000..67d1513b3ff6f --- /dev/null +++ b/docs/changelog/111212.yaml @@ -0,0 +1,6 @@ +pr: 111212 +summary: Fix score count validation in reranker response +area: Ranking +type: bug +issues: + - 111202 diff --git a/docs/changelog/111284.yaml b/docs/changelog/111284.yaml new file mode 100644 index 0000000000000..f87649a134af6 --- /dev/null +++ b/docs/changelog/111284.yaml @@ -0,0 +1,6 @@ +pr: 111284 +summary: Update `semantic_text` field to support indexing numeric and boolean data + types +area: Mapping +type: enhancement +issues: [] diff --git a/docs/changelog/111290.yaml b/docs/changelog/111290.yaml new file mode 100644 index 0000000000000..efcb01a4aedf9 --- /dev/null +++ b/docs/changelog/111290.yaml @@ -0,0 +1,5 @@ +pr: 111290 +summary: Fix enrich policy runner exception handling on empty segments response +area: Ingest Node +type: bug +issues: [] diff --git a/docs/changelog/111311.yaml b/docs/changelog/111311.yaml new file mode 100644 index 0000000000000..5786e11e885e2 --- /dev/null +++ b/docs/changelog/111311.yaml @@ -0,0 +1,6 @@ +pr: 111311 +summary: Adding support for data streams with a match-all template +area: Data streams +type: bug +issues: + - 111204 diff --git a/docs/changelog/111316.yaml b/docs/changelog/111316.yaml new file mode 100644 index 0000000000000..0d915cd1ec3ea --- /dev/null +++ b/docs/changelog/111316.yaml @@ -0,0 +1,5 @@ +pr: 111316 +summary: "[Service Account] Add `AutoOps` account" +area: Security +type: enhancement +issues: [] diff --git a/docs/changelog/111344.yaml b/docs/changelog/111344.yaml new file mode 100644 index 0000000000000..3d5988054749d --- /dev/null +++ b/docs/changelog/111344.yaml @@ -0,0 +1,5 @@ +pr: 111344 +summary: Add support for Azure Managed Identity +area: Snapshot/Restore +type: enhancement +issues: [] diff --git a/docs/changelog/111363.yaml b/docs/changelog/111363.yaml new file mode 100644 index 0000000000000..2cb3c5342ea5c --- /dev/null +++ b/docs/changelog/111363.yaml @@ -0,0 +1,6 @@ +pr: 111363 +summary: Ensure vector similarity correctly limits `inner_hits` returned for nested + kNN +area: Vector Search +type: bug +issues: [111093] diff --git a/docs/changelog/111366.yaml b/docs/changelog/111366.yaml new file mode 100644 index 0000000000000..9cb127077094f --- /dev/null +++ b/docs/changelog/111366.yaml @@ -0,0 +1,6 @@ +pr: 111366 +summary: "[Inference API] Replace `model_id` with `inference_id` in inference API\ + \ except when stored" +area: Machine Learning +type: bug +issues: [] diff --git a/docs/changelog/111367.yaml b/docs/changelog/111367.yaml new file mode 100644 index 0000000000000..89e6c1d3b4da4 --- /dev/null +++ b/docs/changelog/111367.yaml @@ -0,0 +1,5 @@ +pr: 111367 +summary: "ESQL: Add Values aggregation tests, fix `ConstantBytesRefBlock` memory handling" +area: ES|QL +type: bug +issues: [] diff --git a/docs/changelog/111445.yaml b/docs/changelog/111445.yaml new file mode 100644 index 0000000000000..9ba8e4371bd0c --- /dev/null +++ b/docs/changelog/111445.yaml @@ -0,0 +1,5 @@ +pr: 111445 +summary: Support booleans in routing path +area: TSDB +type: enhancement +issues: [] diff --git a/docs/reference/esql/functions/aggregation-functions.asciidoc b/docs/reference/esql/functions/aggregation-functions.asciidoc index 821b109741a0a..7cdc42ea6cbf9 100644 --- a/docs/reference/esql/functions/aggregation-functions.asciidoc +++ b/docs/reference/esql/functions/aggregation-functions.asciidoc @@ -9,8 +9,8 @@ The <> command supports these aggregate functions: // tag::agg_list[] * <> -* <> -* <> +* <> +* <> * <> * <> * <> @@ -19,13 +19,13 @@ The <> command supports these aggregate functions: * experimental:[] <> * <> * <> -* <> -* experimental:[] <> +* <> +* experimental:[] <> // end::agg_list[] -include::count.asciidoc[] -include::count-distinct.asciidoc[] include::layout/avg.asciidoc[] +include::layout/count.asciidoc[] +include::layout/count_distinct.asciidoc[] include::layout/max.asciidoc[] include::layout/median.asciidoc[] include::layout/median_absolute_deviation.asciidoc[] @@ -34,5 +34,5 @@ include::layout/percentile.asciidoc[] include::layout/st_centroid_agg.asciidoc[] include::layout/sum.asciidoc[] include::layout/top.asciidoc[] -include::values.asciidoc[] -include::weighted-avg.asciidoc[] +include::layout/values.asciidoc[] +include::layout/weighted_avg.asciidoc[] diff --git a/docs/reference/esql/functions/appendix/count_distinct.asciidoc b/docs/reference/esql/functions/appendix/count_distinct.asciidoc new file mode 100644 index 0000000000000..065065cf34e06 --- /dev/null +++ b/docs/reference/esql/functions/appendix/count_distinct.asciidoc @@ -0,0 +1,25 @@ +// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it. + +[discrete] +[[esql-agg-count-distinct-approximate]] +==== Counts are approximate + +Computing exact counts requires loading values into a set and returning its +size. This doesn't scale when working on high-cardinality sets and/or large +values as the required memory usage and the need to communicate those +per-shard sets between nodes would utilize too many resources of the cluster. + +This `COUNT_DISTINCT` function is based on the +https://static.googleusercontent.com/media/research.google.com/fr//pubs/archive/40671.pdf[HyperLogLog++] +algorithm, which counts based on the hashes of the values with some interesting +properties: + +include::../../../aggregations/metrics/cardinality-aggregation.asciidoc[tag=explanation] + +The `COUNT_DISTINCT` function takes an optional second parameter to configure +the precision threshold. The precision_threshold options allows to trade memory +for accuracy, and defines a unique count below which counts are expected to be +close to accurate. Above this value, counts might become a bit more fuzzy. The +maximum supported value is 40000, thresholds above this number will have the +same effect as a threshold of 40000. The default value is `3000`. + diff --git a/docs/reference/esql/functions/appendix/values.asciidoc b/docs/reference/esql/functions/appendix/values.asciidoc new file mode 100644 index 0000000000000..ec3cfff2db6a6 --- /dev/null +++ b/docs/reference/esql/functions/appendix/values.asciidoc @@ -0,0 +1,10 @@ +// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it. + +[WARNING] +==== +This can use a significant amount of memory and ES|QL doesn't yet +grow aggregations beyond memory. So this aggregation will work until +it is used to collect more values than can fit into memory. Once it +collects too many values it will fail the query with +a <>. +==== diff --git a/docs/reference/esql/functions/count-distinct.asciidoc b/docs/reference/esql/functions/count-distinct.asciidoc deleted file mode 100644 index a9f30d24e0e83..0000000000000 --- a/docs/reference/esql/functions/count-distinct.asciidoc +++ /dev/null @@ -1,85 +0,0 @@ -[discrete] -[[esql-agg-count-distinct]] -=== `COUNT_DISTINCT` - -*Syntax* - -[source,esql] ----- -COUNT_DISTINCT(expression[, precision_threshold]) ----- - -*Parameters* - -`expression`:: -Expression that outputs the values on which to perform a distinct count. - -`precision_threshold`:: -Precision threshold. Refer to <>. The -maximum supported value is 40000. Thresholds above this number will have the -same effect as a threshold of 40000. The default value is 3000. - -*Description* - -Returns the approximate number of distinct values. - -*Supported types* - -Can take any field type as input. - -*Examples* - -[source.merge.styled,esql] ----- -include::{esql-specs}/stats_count_distinct.csv-spec[tag=count-distinct] ----- -[%header.monospaced.styled,format=dsv,separator=|] -|=== -include::{esql-specs}/stats_count_distinct.csv-spec[tag=count-distinct-result] -|=== - -With the optional second parameter to configure the precision threshold: - -[source.merge.styled,esql] ----- -include::{esql-specs}/stats_count_distinct.csv-spec[tag=count-distinct-precision] ----- -[%header.monospaced.styled,format=dsv,separator=|] -|=== -include::{esql-specs}/stats_count_distinct.csv-spec[tag=count-distinct-precision-result] -|=== - -The expression can use inline functions. This example splits a string into -multiple values using the `SPLIT` function and counts the unique values: - -[source.merge.styled,esql] ----- -include::{esql-specs}/stats_count_distinct.csv-spec[tag=docsCountDistinctWithExpression] ----- -[%header.monospaced.styled,format=dsv,separator=|] -|=== -include::{esql-specs}/stats_count_distinct.csv-spec[tag=docsCountDistinctWithExpression-result] -|=== - -[discrete] -[[esql-agg-count-distinct-approximate]] -==== Counts are approximate - -Computing exact counts requires loading values into a set and returning its -size. This doesn't scale when working on high-cardinality sets and/or large -values as the required memory usage and the need to communicate those -per-shard sets between nodes would utilize too many resources of the cluster. - -This `COUNT_DISTINCT` function is based on the -https://static.googleusercontent.com/media/research.google.com/fr//pubs/archive/40671.pdf[HyperLogLog++] -algorithm, which counts based on the hashes of the values with some interesting -properties: - -include::../../aggregations/metrics/cardinality-aggregation.asciidoc[tag=explanation] - -The `COUNT_DISTINCT` function takes an optional second parameter to configure -the precision threshold. The precision_threshold options allows to trade memory -for accuracy, and defines a unique count below which counts are expected to be -close to accurate. Above this value, counts might become a bit more fuzzy. The -maximum supported value is 40000, thresholds above this number will have the -same effect as a threshold of 40000. The default value is `3000`. \ No newline at end of file diff --git a/docs/reference/esql/functions/description/count.asciidoc b/docs/reference/esql/functions/description/count.asciidoc new file mode 100644 index 0000000000000..ee806d65a8ea3 --- /dev/null +++ b/docs/reference/esql/functions/description/count.asciidoc @@ -0,0 +1,5 @@ +// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it. + +*Description* + +Returns the total number (count) of input values. diff --git a/docs/reference/esql/functions/description/count_distinct.asciidoc b/docs/reference/esql/functions/description/count_distinct.asciidoc new file mode 100644 index 0000000000000..d10825bb991f5 --- /dev/null +++ b/docs/reference/esql/functions/description/count_distinct.asciidoc @@ -0,0 +1,5 @@ +// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it. + +*Description* + +Returns the approximate number of distinct values. diff --git a/docs/reference/esql/functions/description/mv_pseries_weighted_sum.asciidoc b/docs/reference/esql/functions/description/mv_pseries_weighted_sum.asciidoc new file mode 100644 index 0000000000000..d464689f40a01 --- /dev/null +++ b/docs/reference/esql/functions/description/mv_pseries_weighted_sum.asciidoc @@ -0,0 +1,5 @@ +// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it. + +*Description* + +Converts a multivalued expression into a single-valued column by multiplying every element on the input list by its corresponding term in P-Series and computing the sum. diff --git a/docs/reference/esql/functions/description/values.asciidoc b/docs/reference/esql/functions/description/values.asciidoc new file mode 100644 index 0000000000000..b3cebcce955f0 --- /dev/null +++ b/docs/reference/esql/functions/description/values.asciidoc @@ -0,0 +1,5 @@ +// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it. + +*Description* + +Returns all values in a group as a multivalued field. The order of the returned values isn't guaranteed. If you need the values returned in order use <>. diff --git a/docs/reference/esql/functions/description/weighted_avg.asciidoc b/docs/reference/esql/functions/description/weighted_avg.asciidoc new file mode 100644 index 0000000000000..a15d5d4ea171d --- /dev/null +++ b/docs/reference/esql/functions/description/weighted_avg.asciidoc @@ -0,0 +1,5 @@ +// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it. + +*Description* + +The weighted average of a numeric expression. diff --git a/docs/reference/esql/functions/count.asciidoc b/docs/reference/esql/functions/examples/count.asciidoc similarity index 63% rename from docs/reference/esql/functions/count.asciidoc rename to docs/reference/esql/functions/examples/count.asciidoc index 66cfe76350cdd..fb696b51e054c 100644 --- a/docs/reference/esql/functions/count.asciidoc +++ b/docs/reference/esql/functions/examples/count.asciidoc @@ -1,27 +1,4 @@ -[discrete] -[[esql-agg-count]] -=== `COUNT` - -*Syntax* - -[source,esql] ----- -COUNT([expression]) ----- - -*Parameters* - -`expression`:: -Expression that outputs values to be counted. -If omitted, equivalent to `COUNT(*)` (the number of rows). - -*Description* - -Returns the total number (count) of input values. - -*Supported types* - -Can take any field type as input. +// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it. *Examples* @@ -33,9 +10,7 @@ include::{esql-specs}/stats.csv-spec[tag=count] |=== include::{esql-specs}/stats.csv-spec[tag=count-result] |=== - -To count the number of rows, use `COUNT()` or `COUNT(*)`: - +To count the number of rows, use `COUNT()` or `COUNT(*)` [source.merge.styled,esql] ---- include::{esql-specs}/docs.csv-spec[tag=countAll] @@ -44,10 +19,7 @@ include::{esql-specs}/docs.csv-spec[tag=countAll] |=== include::{esql-specs}/docs.csv-spec[tag=countAll-result] |=== - -The expression can use inline functions. This example splits a string into -multiple values using the `SPLIT` function and counts the values: - +The expression can use inline functions. This example splits a string into multiple values using the `SPLIT` function and counts the values [source.merge.styled,esql] ---- include::{esql-specs}/stats.csv-spec[tag=docsCountWithExpression] @@ -56,11 +28,7 @@ include::{esql-specs}/stats.csv-spec[tag=docsCountWithExpression] |=== include::{esql-specs}/stats.csv-spec[tag=docsCountWithExpression-result] |=== - -[[esql-agg-count-or-null]] -To count the number of times an expression returns `TRUE` use -a <> command to remove rows that shouldn't be included: - +To count the number of times an expression returns `TRUE` use a <> command to remove rows that shouldn't be included [source.merge.styled,esql] ---- include::{esql-specs}/stats.csv-spec[tag=count-where] @@ -69,10 +37,7 @@ include::{esql-specs}/stats.csv-spec[tag=count-where] |=== include::{esql-specs}/stats.csv-spec[tag=count-where-result] |=== - -To count the same stream of data based on two different expressions -use the pattern `COUNT( OR NULL)`: - +To count the same stream of data based on two different expressions use the pattern `COUNT( OR NULL)` [source.merge.styled,esql] ---- include::{esql-specs}/stats.csv-spec[tag=count-or-null] @@ -81,3 +46,4 @@ include::{esql-specs}/stats.csv-spec[tag=count-or-null] |=== include::{esql-specs}/stats.csv-spec[tag=count-or-null-result] |=== + diff --git a/docs/reference/esql/functions/examples/count_distinct.asciidoc b/docs/reference/esql/functions/examples/count_distinct.asciidoc new file mode 100644 index 0000000000000..44968c0652ec0 --- /dev/null +++ b/docs/reference/esql/functions/examples/count_distinct.asciidoc @@ -0,0 +1,31 @@ +// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it. + +*Examples* + +[source.merge.styled,esql] +---- +include::{esql-specs}/stats_count_distinct.csv-spec[tag=count-distinct] +---- +[%header.monospaced.styled,format=dsv,separator=|] +|=== +include::{esql-specs}/stats_count_distinct.csv-spec[tag=count-distinct-result] +|=== +With the optional second parameter to configure the precision threshold +[source.merge.styled,esql] +---- +include::{esql-specs}/stats_count_distinct.csv-spec[tag=count-distinct-precision] +---- +[%header.monospaced.styled,format=dsv,separator=|] +|=== +include::{esql-specs}/stats_count_distinct.csv-spec[tag=count-distinct-precision-result] +|=== +The expression can use inline functions. This example splits a string into multiple values using the `SPLIT` function and counts the unique values +[source.merge.styled,esql] +---- +include::{esql-specs}/stats_count_distinct.csv-spec[tag=docsCountDistinctWithExpression] +---- +[%header.monospaced.styled,format=dsv,separator=|] +|=== +include::{esql-specs}/stats_count_distinct.csv-spec[tag=docsCountDistinctWithExpression-result] +|=== + diff --git a/docs/reference/esql/functions/examples/mv_pseries_weighted_sum.asciidoc b/docs/reference/esql/functions/examples/mv_pseries_weighted_sum.asciidoc new file mode 100644 index 0000000000000..bce4deb1f5225 --- /dev/null +++ b/docs/reference/esql/functions/examples/mv_pseries_weighted_sum.asciidoc @@ -0,0 +1,13 @@ +// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it. + +*Example* + +[source.merge.styled,esql] +---- +include::{esql-specs}/mv_pseries_weighted_sum.csv-spec[tag=example] +---- +[%header.monospaced.styled,format=dsv,separator=|] +|=== +include::{esql-specs}/mv_pseries_weighted_sum.csv-spec[tag=example-result] +|=== + diff --git a/docs/reference/esql/functions/examples/values.asciidoc b/docs/reference/esql/functions/examples/values.asciidoc new file mode 100644 index 0000000000000..c013fc39d92ca --- /dev/null +++ b/docs/reference/esql/functions/examples/values.asciidoc @@ -0,0 +1,13 @@ +// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it. + +*Example* + +[source.merge.styled,esql] +---- +include::{esql-specs}/string.csv-spec[tag=values-grouped] +---- +[%header.monospaced.styled,format=dsv,separator=|] +|=== +include::{esql-specs}/string.csv-spec[tag=values-grouped-result] +|=== + diff --git a/docs/reference/esql/functions/examples/weighted_avg.asciidoc b/docs/reference/esql/functions/examples/weighted_avg.asciidoc new file mode 100644 index 0000000000000..e8e8cc3eda006 --- /dev/null +++ b/docs/reference/esql/functions/examples/weighted_avg.asciidoc @@ -0,0 +1,13 @@ +// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it. + +*Example* + +[source.merge.styled,esql] +---- +include::{esql-specs}/stats.csv-spec[tag=weighted-avg] +---- +[%header.monospaced.styled,format=dsv,separator=|] +|=== +include::{esql-specs}/stats.csv-spec[tag=weighted-avg-result] +|=== + diff --git a/docs/reference/esql/functions/kibana/definition/count.json b/docs/reference/esql/functions/kibana/definition/count.json new file mode 100644 index 0000000000000..e05ebc6789816 --- /dev/null +++ b/docs/reference/esql/functions/kibana/definition/count.json @@ -0,0 +1,159 @@ +{ + "comment" : "This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.", + "type" : "agg", + "name" : "count", + "description" : "Returns the total number (count) of input values.", + "signatures" : [ + { + "params" : [ + { + "name" : "field", + "type" : "boolean", + "optional" : true, + "description" : "Expression that outputs values to be counted. If omitted, equivalent to `COUNT(*)` (the number of rows)." + } + ], + "variadic" : false, + "returnType" : "long" + }, + { + "params" : [ + { + "name" : "field", + "type" : "cartesian_point", + "optional" : true, + "description" : "Expression that outputs values to be counted. If omitted, equivalent to `COUNT(*)` (the number of rows)." + } + ], + "variadic" : false, + "returnType" : "long" + }, + { + "params" : [ + { + "name" : "field", + "type" : "datetime", + "optional" : true, + "description" : "Expression that outputs values to be counted. If omitted, equivalent to `COUNT(*)` (the number of rows)." + } + ], + "variadic" : false, + "returnType" : "long" + }, + { + "params" : [ + { + "name" : "field", + "type" : "double", + "optional" : true, + "description" : "Expression that outputs values to be counted. If omitted, equivalent to `COUNT(*)` (the number of rows)." + } + ], + "variadic" : false, + "returnType" : "long" + }, + { + "params" : [ + { + "name" : "field", + "type" : "geo_point", + "optional" : true, + "description" : "Expression that outputs values to be counted. If omitted, equivalent to `COUNT(*)` (the number of rows)." + } + ], + "variadic" : false, + "returnType" : "long" + }, + { + "params" : [ + { + "name" : "field", + "type" : "integer", + "optional" : true, + "description" : "Expression that outputs values to be counted. If omitted, equivalent to `COUNT(*)` (the number of rows)." + } + ], + "variadic" : false, + "returnType" : "long" + }, + { + "params" : [ + { + "name" : "field", + "type" : "ip", + "optional" : true, + "description" : "Expression that outputs values to be counted. If omitted, equivalent to `COUNT(*)` (the number of rows)." + } + ], + "variadic" : false, + "returnType" : "long" + }, + { + "params" : [ + { + "name" : "field", + "type" : "keyword", + "optional" : true, + "description" : "Expression that outputs values to be counted. If omitted, equivalent to `COUNT(*)` (the number of rows)." + } + ], + "variadic" : false, + "returnType" : "long" + }, + { + "params" : [ + { + "name" : "field", + "type" : "long", + "optional" : true, + "description" : "Expression that outputs values to be counted. If omitted, equivalent to `COUNT(*)` (the number of rows)." + } + ], + "variadic" : false, + "returnType" : "long" + }, + { + "params" : [ + { + "name" : "field", + "type" : "text", + "optional" : true, + "description" : "Expression that outputs values to be counted. If omitted, equivalent to `COUNT(*)` (the number of rows)." + } + ], + "variadic" : false, + "returnType" : "long" + }, + { + "params" : [ + { + "name" : "field", + "type" : "unsigned_long", + "optional" : true, + "description" : "Expression that outputs values to be counted. If omitted, equivalent to `COUNT(*)` (the number of rows)." + } + ], + "variadic" : false, + "returnType" : "long" + }, + { + "params" : [ + { + "name" : "field", + "type" : "version", + "optional" : true, + "description" : "Expression that outputs values to be counted. If omitted, equivalent to `COUNT(*)` (the number of rows)." + } + ], + "variadic" : false, + "returnType" : "long" + } + ], + "examples" : [ + "FROM employees\n| STATS COUNT(height)", + "FROM employees \n| STATS count = COUNT(*) BY languages \n| SORT languages DESC", + "ROW words=\"foo;bar;baz;qux;quux;foo\"\n| STATS word_count = COUNT(SPLIT(words, \";\"))", + "ROW n=1\n| WHERE n < 0\n| STATS COUNT(n)", + "ROW n=1\n| STATS COUNT(n > 0 OR NULL), COUNT(n < 0 OR NULL)" + ] +} diff --git a/docs/reference/esql/functions/kibana/definition/count_distinct.json b/docs/reference/esql/functions/kibana/definition/count_distinct.json new file mode 100644 index 0000000000000..801bd26f7d022 --- /dev/null +++ b/docs/reference/esql/functions/kibana/definition/count_distinct.json @@ -0,0 +1,607 @@ +{ + "comment" : "This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.", + "type" : "agg", + "name" : "count_distinct", + "description" : "Returns the approximate number of distinct values.", + "signatures" : [ + { + "params" : [ + { + "name" : "field", + "type" : "boolean", + "optional" : false, + "description" : "Column or literal for which to count the number of distinct values." + } + ], + "variadic" : false, + "returnType" : "long" + }, + { + "params" : [ + { + "name" : "field", + "type" : "boolean", + "optional" : false, + "description" : "Column or literal for which to count the number of distinct values." + }, + { + "name" : "precision", + "type" : "integer", + "optional" : true, + "description" : "Precision threshold. Refer to <>. The maximum supported value is 40000. Thresholds above this number will have the same effect as a threshold of 40000. The default value is 3000." + } + ], + "variadic" : false, + "returnType" : "long" + }, + { + "params" : [ + { + "name" : "field", + "type" : "boolean", + "optional" : false, + "description" : "Column or literal for which to count the number of distinct values." + }, + { + "name" : "precision", + "type" : "long", + "optional" : true, + "description" : "Precision threshold. Refer to <>. The maximum supported value is 40000. Thresholds above this number will have the same effect as a threshold of 40000. The default value is 3000." + } + ], + "variadic" : false, + "returnType" : "long" + }, + { + "params" : [ + { + "name" : "field", + "type" : "boolean", + "optional" : false, + "description" : "Column or literal for which to count the number of distinct values." + }, + { + "name" : "precision", + "type" : "unsigned_long", + "optional" : true, + "description" : "Precision threshold. Refer to <>. The maximum supported value is 40000. Thresholds above this number will have the same effect as a threshold of 40000. The default value is 3000." + } + ], + "variadic" : false, + "returnType" : "long" + }, + { + "params" : [ + { + "name" : "field", + "type" : "datetime", + "optional" : false, + "description" : "Column or literal for which to count the number of distinct values." + } + ], + "variadic" : false, + "returnType" : "long" + }, + { + "params" : [ + { + "name" : "field", + "type" : "datetime", + "optional" : false, + "description" : "Column or literal for which to count the number of distinct values." + }, + { + "name" : "precision", + "type" : "integer", + "optional" : true, + "description" : "Precision threshold. Refer to <>. The maximum supported value is 40000. Thresholds above this number will have the same effect as a threshold of 40000. The default value is 3000." + } + ], + "variadic" : false, + "returnType" : "long" + }, + { + "params" : [ + { + "name" : "field", + "type" : "datetime", + "optional" : false, + "description" : "Column or literal for which to count the number of distinct values." + }, + { + "name" : "precision", + "type" : "long", + "optional" : true, + "description" : "Precision threshold. Refer to <>. The maximum supported value is 40000. Thresholds above this number will have the same effect as a threshold of 40000. The default value is 3000." + } + ], + "variadic" : false, + "returnType" : "long" + }, + { + "params" : [ + { + "name" : "field", + "type" : "datetime", + "optional" : false, + "description" : "Column or literal for which to count the number of distinct values." + }, + { + "name" : "precision", + "type" : "unsigned_long", + "optional" : true, + "description" : "Precision threshold. Refer to <>. The maximum supported value is 40000. Thresholds above this number will have the same effect as a threshold of 40000. The default value is 3000." + } + ], + "variadic" : false, + "returnType" : "long" + }, + { + "params" : [ + { + "name" : "field", + "type" : "double", + "optional" : false, + "description" : "Column or literal for which to count the number of distinct values." + } + ], + "variadic" : false, + "returnType" : "long" + }, + { + "params" : [ + { + "name" : "field", + "type" : "double", + "optional" : false, + "description" : "Column or literal for which to count the number of distinct values." + }, + { + "name" : "precision", + "type" : "integer", + "optional" : true, + "description" : "Precision threshold. Refer to <>. The maximum supported value is 40000. Thresholds above this number will have the same effect as a threshold of 40000. The default value is 3000." + } + ], + "variadic" : false, + "returnType" : "long" + }, + { + "params" : [ + { + "name" : "field", + "type" : "double", + "optional" : false, + "description" : "Column or literal for which to count the number of distinct values." + }, + { + "name" : "precision", + "type" : "long", + "optional" : true, + "description" : "Precision threshold. Refer to <>. The maximum supported value is 40000. Thresholds above this number will have the same effect as a threshold of 40000. The default value is 3000." + } + ], + "variadic" : false, + "returnType" : "long" + }, + { + "params" : [ + { + "name" : "field", + "type" : "double", + "optional" : false, + "description" : "Column or literal for which to count the number of distinct values." + }, + { + "name" : "precision", + "type" : "unsigned_long", + "optional" : true, + "description" : "Precision threshold. Refer to <>. The maximum supported value is 40000. Thresholds above this number will have the same effect as a threshold of 40000. The default value is 3000." + } + ], + "variadic" : false, + "returnType" : "long" + }, + { + "params" : [ + { + "name" : "field", + "type" : "integer", + "optional" : false, + "description" : "Column or literal for which to count the number of distinct values." + } + ], + "variadic" : false, + "returnType" : "long" + }, + { + "params" : [ + { + "name" : "field", + "type" : "integer", + "optional" : false, + "description" : "Column or literal for which to count the number of distinct values." + }, + { + "name" : "precision", + "type" : "integer", + "optional" : true, + "description" : "Precision threshold. Refer to <>. The maximum supported value is 40000. Thresholds above this number will have the same effect as a threshold of 40000. The default value is 3000." + } + ], + "variadic" : false, + "returnType" : "long" + }, + { + "params" : [ + { + "name" : "field", + "type" : "integer", + "optional" : false, + "description" : "Column or literal for which to count the number of distinct values." + }, + { + "name" : "precision", + "type" : "long", + "optional" : true, + "description" : "Precision threshold. Refer to <>. The maximum supported value is 40000. Thresholds above this number will have the same effect as a threshold of 40000. The default value is 3000." + } + ], + "variadic" : false, + "returnType" : "long" + }, + { + "params" : [ + { + "name" : "field", + "type" : "integer", + "optional" : false, + "description" : "Column or literal for which to count the number of distinct values." + }, + { + "name" : "precision", + "type" : "unsigned_long", + "optional" : true, + "description" : "Precision threshold. Refer to <>. The maximum supported value is 40000. Thresholds above this number will have the same effect as a threshold of 40000. The default value is 3000." + } + ], + "variadic" : false, + "returnType" : "long" + }, + { + "params" : [ + { + "name" : "field", + "type" : "ip", + "optional" : false, + "description" : "Column or literal for which to count the number of distinct values." + } + ], + "variadic" : false, + "returnType" : "long" + }, + { + "params" : [ + { + "name" : "field", + "type" : "ip", + "optional" : false, + "description" : "Column or literal for which to count the number of distinct values." + }, + { + "name" : "precision", + "type" : "integer", + "optional" : true, + "description" : "Precision threshold. Refer to <>. The maximum supported value is 40000. Thresholds above this number will have the same effect as a threshold of 40000. The default value is 3000." + } + ], + "variadic" : false, + "returnType" : "long" + }, + { + "params" : [ + { + "name" : "field", + "type" : "ip", + "optional" : false, + "description" : "Column or literal for which to count the number of distinct values." + }, + { + "name" : "precision", + "type" : "long", + "optional" : true, + "description" : "Precision threshold. Refer to <>. The maximum supported value is 40000. Thresholds above this number will have the same effect as a threshold of 40000. The default value is 3000." + } + ], + "variadic" : false, + "returnType" : "long" + }, + { + "params" : [ + { + "name" : "field", + "type" : "ip", + "optional" : false, + "description" : "Column or literal for which to count the number of distinct values." + }, + { + "name" : "precision", + "type" : "unsigned_long", + "optional" : true, + "description" : "Precision threshold. Refer to <>. The maximum supported value is 40000. Thresholds above this number will have the same effect as a threshold of 40000. The default value is 3000." + } + ], + "variadic" : false, + "returnType" : "long" + }, + { + "params" : [ + { + "name" : "field", + "type" : "keyword", + "optional" : false, + "description" : "Column or literal for which to count the number of distinct values." + } + ], + "variadic" : false, + "returnType" : "long" + }, + { + "params" : [ + { + "name" : "field", + "type" : "keyword", + "optional" : false, + "description" : "Column or literal for which to count the number of distinct values." + }, + { + "name" : "precision", + "type" : "integer", + "optional" : true, + "description" : "Precision threshold. Refer to <>. The maximum supported value is 40000. Thresholds above this number will have the same effect as a threshold of 40000. The default value is 3000." + } + ], + "variadic" : false, + "returnType" : "long" + }, + { + "params" : [ + { + "name" : "field", + "type" : "keyword", + "optional" : false, + "description" : "Column or literal for which to count the number of distinct values." + }, + { + "name" : "precision", + "type" : "long", + "optional" : true, + "description" : "Precision threshold. Refer to <>. The maximum supported value is 40000. Thresholds above this number will have the same effect as a threshold of 40000. The default value is 3000." + } + ], + "variadic" : false, + "returnType" : "long" + }, + { + "params" : [ + { + "name" : "field", + "type" : "keyword", + "optional" : false, + "description" : "Column or literal for which to count the number of distinct values." + }, + { + "name" : "precision", + "type" : "unsigned_long", + "optional" : true, + "description" : "Precision threshold. Refer to <>. The maximum supported value is 40000. Thresholds above this number will have the same effect as a threshold of 40000. The default value is 3000." + } + ], + "variadic" : false, + "returnType" : "long" + }, + { + "params" : [ + { + "name" : "field", + "type" : "long", + "optional" : false, + "description" : "Column or literal for which to count the number of distinct values." + } + ], + "variadic" : false, + "returnType" : "long" + }, + { + "params" : [ + { + "name" : "field", + "type" : "long", + "optional" : false, + "description" : "Column or literal for which to count the number of distinct values." + }, + { + "name" : "precision", + "type" : "integer", + "optional" : true, + "description" : "Precision threshold. Refer to <>. The maximum supported value is 40000. Thresholds above this number will have the same effect as a threshold of 40000. The default value is 3000." + } + ], + "variadic" : false, + "returnType" : "long" + }, + { + "params" : [ + { + "name" : "field", + "type" : "long", + "optional" : false, + "description" : "Column or literal for which to count the number of distinct values." + }, + { + "name" : "precision", + "type" : "long", + "optional" : true, + "description" : "Precision threshold. Refer to <>. The maximum supported value is 40000. Thresholds above this number will have the same effect as a threshold of 40000. The default value is 3000." + } + ], + "variadic" : false, + "returnType" : "long" + }, + { + "params" : [ + { + "name" : "field", + "type" : "long", + "optional" : false, + "description" : "Column or literal for which to count the number of distinct values." + }, + { + "name" : "precision", + "type" : "unsigned_long", + "optional" : true, + "description" : "Precision threshold. Refer to <>. The maximum supported value is 40000. Thresholds above this number will have the same effect as a threshold of 40000. The default value is 3000." + } + ], + "variadic" : false, + "returnType" : "long" + }, + { + "params" : [ + { + "name" : "field", + "type" : "text", + "optional" : false, + "description" : "Column or literal for which to count the number of distinct values." + } + ], + "variadic" : false, + "returnType" : "long" + }, + { + "params" : [ + { + "name" : "field", + "type" : "text", + "optional" : false, + "description" : "Column or literal for which to count the number of distinct values." + }, + { + "name" : "precision", + "type" : "integer", + "optional" : true, + "description" : "Precision threshold. Refer to <>. The maximum supported value is 40000. Thresholds above this number will have the same effect as a threshold of 40000. The default value is 3000." + } + ], + "variadic" : false, + "returnType" : "long" + }, + { + "params" : [ + { + "name" : "field", + "type" : "text", + "optional" : false, + "description" : "Column or literal for which to count the number of distinct values." + }, + { + "name" : "precision", + "type" : "long", + "optional" : true, + "description" : "Precision threshold. Refer to <>. The maximum supported value is 40000. Thresholds above this number will have the same effect as a threshold of 40000. The default value is 3000." + } + ], + "variadic" : false, + "returnType" : "long" + }, + { + "params" : [ + { + "name" : "field", + "type" : "text", + "optional" : false, + "description" : "Column or literal for which to count the number of distinct values." + }, + { + "name" : "precision", + "type" : "unsigned_long", + "optional" : true, + "description" : "Precision threshold. Refer to <>. The maximum supported value is 40000. Thresholds above this number will have the same effect as a threshold of 40000. The default value is 3000." + } + ], + "variadic" : false, + "returnType" : "long" + }, + { + "params" : [ + { + "name" : "field", + "type" : "version", + "optional" : false, + "description" : "Column or literal for which to count the number of distinct values." + } + ], + "variadic" : false, + "returnType" : "long" + }, + { + "params" : [ + { + "name" : "field", + "type" : "version", + "optional" : false, + "description" : "Column or literal for which to count the number of distinct values." + }, + { + "name" : "precision", + "type" : "integer", + "optional" : true, + "description" : "Precision threshold. Refer to <>. The maximum supported value is 40000. Thresholds above this number will have the same effect as a threshold of 40000. The default value is 3000." + } + ], + "variadic" : false, + "returnType" : "long" + }, + { + "params" : [ + { + "name" : "field", + "type" : "version", + "optional" : false, + "description" : "Column or literal for which to count the number of distinct values." + }, + { + "name" : "precision", + "type" : "long", + "optional" : true, + "description" : "Precision threshold. Refer to <>. The maximum supported value is 40000. Thresholds above this number will have the same effect as a threshold of 40000. The default value is 3000." + } + ], + "variadic" : false, + "returnType" : "long" + }, + { + "params" : [ + { + "name" : "field", + "type" : "version", + "optional" : false, + "description" : "Column or literal for which to count the number of distinct values." + }, + { + "name" : "precision", + "type" : "unsigned_long", + "optional" : true, + "description" : "Precision threshold. Refer to <>. The maximum supported value is 40000. Thresholds above this number will have the same effect as a threshold of 40000. The default value is 3000." + } + ], + "variadic" : false, + "returnType" : "long" + } + ], + "examples" : [ + "FROM hosts\n| STATS COUNT_DISTINCT(ip0), COUNT_DISTINCT(ip1)", + "FROM hosts\n| STATS COUNT_DISTINCT(ip0, 80000), COUNT_DISTINCT(ip1, 5)", + "ROW words=\"foo;bar;baz;qux;quux;foo\"\n| STATS distinct_word_count = COUNT_DISTINCT(SPLIT(words, \";\"))" + ] +} diff --git a/docs/reference/esql/functions/kibana/definition/mv_pseries_weighted_sum.json b/docs/reference/esql/functions/kibana/definition/mv_pseries_weighted_sum.json new file mode 100644 index 0000000000000..626f7befbb12e --- /dev/null +++ b/docs/reference/esql/functions/kibana/definition/mv_pseries_weighted_sum.json @@ -0,0 +1,29 @@ +{ + "comment" : "This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.", + "type" : "eval", + "name" : "mv_pseries_weighted_sum", + "description" : "Converts a multivalued expression into a single-valued column by multiplying every element on the input list by its corresponding term in P-Series and computing the sum.", + "signatures" : [ + { + "params" : [ + { + "name" : "number", + "type" : "double", + "optional" : false, + "description" : "Multivalue expression." + }, + { + "name" : "p", + "type" : "double", + "optional" : false, + "description" : "It is a constant number that represents the 'p' parameter in the P-Series. It impacts every element's contribution to the weighted sum." + } + ], + "variadic" : false, + "returnType" : "double" + } + ], + "examples" : [ + "ROW a = [70.0, 45.0, 21.0, 21.0, 21.0]\n| EVAL sum = MV_PSERIES_WEIGHTED_SUM(a, 1.5)\n| KEEP sum" + ] +} diff --git a/docs/reference/esql/functions/kibana/definition/values.json b/docs/reference/esql/functions/kibana/definition/values.json new file mode 100644 index 0000000000000..3e0036c4d25b6 --- /dev/null +++ b/docs/reference/esql/functions/kibana/definition/values.json @@ -0,0 +1,119 @@ +{ + "comment" : "This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.", + "type" : "agg", + "name" : "values", + "description" : "Returns all values in a group as a multivalued field. The order of the returned values isn't guaranteed. If you need the values returned in order use <>.", + "signatures" : [ + { + "params" : [ + { + "name" : "field", + "type" : "boolean", + "optional" : false, + "description" : "" + } + ], + "variadic" : false, + "returnType" : "boolean" + }, + { + "params" : [ + { + "name" : "field", + "type" : "datetime", + "optional" : false, + "description" : "" + } + ], + "variadic" : false, + "returnType" : "datetime" + }, + { + "params" : [ + { + "name" : "field", + "type" : "double", + "optional" : false, + "description" : "" + } + ], + "variadic" : false, + "returnType" : "double" + }, + { + "params" : [ + { + "name" : "field", + "type" : "integer", + "optional" : false, + "description" : "" + } + ], + "variadic" : false, + "returnType" : "integer" + }, + { + "params" : [ + { + "name" : "field", + "type" : "ip", + "optional" : false, + "description" : "" + } + ], + "variadic" : false, + "returnType" : "ip" + }, + { + "params" : [ + { + "name" : "field", + "type" : "keyword", + "optional" : false, + "description" : "" + } + ], + "variadic" : false, + "returnType" : "keyword" + }, + { + "params" : [ + { + "name" : "field", + "type" : "long", + "optional" : false, + "description" : "" + } + ], + "variadic" : false, + "returnType" : "long" + }, + { + "params" : [ + { + "name" : "field", + "type" : "text", + "optional" : false, + "description" : "" + } + ], + "variadic" : false, + "returnType" : "text" + }, + { + "params" : [ + { + "name" : "field", + "type" : "version", + "optional" : false, + "description" : "" + } + ], + "variadic" : false, + "returnType" : "version" + } + ], + "examples" : [ + " FROM employees\n| EVAL first_letter = SUBSTRING(first_name, 0, 1)\n| STATS first_name=MV_SORT(VALUES(first_name)) BY first_letter\n| SORT first_letter" + ] +} diff --git a/docs/reference/esql/functions/kibana/definition/weighted_avg.json b/docs/reference/esql/functions/kibana/definition/weighted_avg.json new file mode 100644 index 0000000000000..a04a5bff62bbb --- /dev/null +++ b/docs/reference/esql/functions/kibana/definition/weighted_avg.json @@ -0,0 +1,173 @@ +{ + "comment" : "This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.", + "type" : "agg", + "name" : "weighted_avg", + "description" : "The weighted average of a numeric expression.", + "signatures" : [ + { + "params" : [ + { + "name" : "number", + "type" : "double", + "optional" : false, + "description" : "A numeric value." + }, + { + "name" : "weight", + "type" : "double", + "optional" : false, + "description" : "A numeric weight." + } + ], + "variadic" : false, + "returnType" : "double" + }, + { + "params" : [ + { + "name" : "number", + "type" : "double", + "optional" : false, + "description" : "A numeric value." + }, + { + "name" : "weight", + "type" : "integer", + "optional" : false, + "description" : "A numeric weight." + } + ], + "variadic" : false, + "returnType" : "double" + }, + { + "params" : [ + { + "name" : "number", + "type" : "double", + "optional" : false, + "description" : "A numeric value." + }, + { + "name" : "weight", + "type" : "long", + "optional" : false, + "description" : "A numeric weight." + } + ], + "variadic" : false, + "returnType" : "double" + }, + { + "params" : [ + { + "name" : "number", + "type" : "integer", + "optional" : false, + "description" : "A numeric value." + }, + { + "name" : "weight", + "type" : "double", + "optional" : false, + "description" : "A numeric weight." + } + ], + "variadic" : false, + "returnType" : "double" + }, + { + "params" : [ + { + "name" : "number", + "type" : "integer", + "optional" : false, + "description" : "A numeric value." + }, + { + "name" : "weight", + "type" : "integer", + "optional" : false, + "description" : "A numeric weight." + } + ], + "variadic" : false, + "returnType" : "double" + }, + { + "params" : [ + { + "name" : "number", + "type" : "integer", + "optional" : false, + "description" : "A numeric value." + }, + { + "name" : "weight", + "type" : "long", + "optional" : false, + "description" : "A numeric weight." + } + ], + "variadic" : false, + "returnType" : "double" + }, + { + "params" : [ + { + "name" : "number", + "type" : "long", + "optional" : false, + "description" : "A numeric value." + }, + { + "name" : "weight", + "type" : "double", + "optional" : false, + "description" : "A numeric weight." + } + ], + "variadic" : false, + "returnType" : "double" + }, + { + "params" : [ + { + "name" : "number", + "type" : "long", + "optional" : false, + "description" : "A numeric value." + }, + { + "name" : "weight", + "type" : "integer", + "optional" : false, + "description" : "A numeric weight." + } + ], + "variadic" : false, + "returnType" : "double" + }, + { + "params" : [ + { + "name" : "number", + "type" : "long", + "optional" : false, + "description" : "A numeric value." + }, + { + "name" : "weight", + "type" : "long", + "optional" : false, + "description" : "A numeric weight." + } + ], + "variadic" : false, + "returnType" : "double" + } + ], + "examples" : [ + "FROM employees\n| STATS w_avg = WEIGHTED_AVG(salary, height) by languages\n| EVAL w_avg = ROUND(w_avg)\n| KEEP w_avg, languages\n| SORT languages" + ] +} diff --git a/docs/reference/esql/functions/kibana/docs/count.md b/docs/reference/esql/functions/kibana/docs/count.md new file mode 100644 index 0000000000000..dc9c356a847ed --- /dev/null +++ b/docs/reference/esql/functions/kibana/docs/count.md @@ -0,0 +1,11 @@ + + +### COUNT +Returns the total number (count) of input values. + +``` +FROM employees +| STATS COUNT(height) +``` diff --git a/docs/reference/esql/functions/kibana/docs/count_distinct.md b/docs/reference/esql/functions/kibana/docs/count_distinct.md new file mode 100644 index 0000000000000..a6b451bf9d38d --- /dev/null +++ b/docs/reference/esql/functions/kibana/docs/count_distinct.md @@ -0,0 +1,11 @@ + + +### COUNT_DISTINCT +Returns the approximate number of distinct values. + +``` +FROM hosts +| STATS COUNT_DISTINCT(ip0), COUNT_DISTINCT(ip1) +``` diff --git a/docs/reference/esql/functions/kibana/docs/mv_pseries_weighted_sum.md b/docs/reference/esql/functions/kibana/docs/mv_pseries_weighted_sum.md new file mode 100644 index 0000000000000..fbeb310449b9b --- /dev/null +++ b/docs/reference/esql/functions/kibana/docs/mv_pseries_weighted_sum.md @@ -0,0 +1,12 @@ + + +### MV_PSERIES_WEIGHTED_SUM +Converts a multivalued expression into a single-valued column by multiplying every element on the input list by its corresponding term in P-Series and computing the sum. + +``` +ROW a = [70.0, 45.0, 21.0, 21.0, 21.0] +| EVAL sum = MV_PSERIES_WEIGHTED_SUM(a, 1.5) +| KEEP sum +``` diff --git a/docs/reference/esql/functions/kibana/docs/values.md b/docs/reference/esql/functions/kibana/docs/values.md new file mode 100644 index 0000000000000..cba62fc27255e --- /dev/null +++ b/docs/reference/esql/functions/kibana/docs/values.md @@ -0,0 +1,13 @@ + + +### VALUES +Returns all values in a group as a multivalued field. The order of the returned values isn't guaranteed. If you need the values returned in order use <>. + +``` + FROM employees +| EVAL first_letter = SUBSTRING(first_name, 0, 1) +| STATS first_name=MV_SORT(VALUES(first_name)) BY first_letter +| SORT first_letter +``` diff --git a/docs/reference/esql/functions/kibana/docs/weighted_avg.md b/docs/reference/esql/functions/kibana/docs/weighted_avg.md new file mode 100644 index 0000000000000..6b0b2cc8cd287 --- /dev/null +++ b/docs/reference/esql/functions/kibana/docs/weighted_avg.md @@ -0,0 +1,14 @@ + + +### WEIGHTED_AVG +The weighted average of a numeric expression. + +``` +FROM employees +| STATS w_avg = WEIGHTED_AVG(salary, height) by languages +| EVAL w_avg = ROUND(w_avg) +| KEEP w_avg, languages +| SORT languages +``` diff --git a/docs/reference/esql/functions/layout/count.asciidoc b/docs/reference/esql/functions/layout/count.asciidoc new file mode 100644 index 0000000000000..8c16d74cde9a7 --- /dev/null +++ b/docs/reference/esql/functions/layout/count.asciidoc @@ -0,0 +1,15 @@ +// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it. + +[discrete] +[[esql-count]] +=== `COUNT` + +*Syntax* + +[.text-center] +image::esql/functions/signature/count.svg[Embedded,opts=inline] + +include::../parameters/count.asciidoc[] +include::../description/count.asciidoc[] +include::../types/count.asciidoc[] +include::../examples/count.asciidoc[] diff --git a/docs/reference/esql/functions/layout/count_distinct.asciidoc b/docs/reference/esql/functions/layout/count_distinct.asciidoc new file mode 100644 index 0000000000000..2c9848186e806 --- /dev/null +++ b/docs/reference/esql/functions/layout/count_distinct.asciidoc @@ -0,0 +1,16 @@ +// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it. + +[discrete] +[[esql-count_distinct]] +=== `COUNT_DISTINCT` + +*Syntax* + +[.text-center] +image::esql/functions/signature/count_distinct.svg[Embedded,opts=inline] + +include::../parameters/count_distinct.asciidoc[] +include::../description/count_distinct.asciidoc[] +include::../types/count_distinct.asciidoc[] +include::../examples/count_distinct.asciidoc[] +include::../appendix/count_distinct.asciidoc[] diff --git a/docs/reference/esql/functions/layout/mv_pseries_weighted_sum.asciidoc b/docs/reference/esql/functions/layout/mv_pseries_weighted_sum.asciidoc new file mode 100644 index 0000000000000..7c14ecbc3c935 --- /dev/null +++ b/docs/reference/esql/functions/layout/mv_pseries_weighted_sum.asciidoc @@ -0,0 +1,15 @@ +// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it. + +[discrete] +[[esql-mv_pseries_weighted_sum]] +=== `MV_PSERIES_WEIGHTED_SUM` + +*Syntax* + +[.text-center] +image::esql/functions/signature/mv_pseries_weighted_sum.svg[Embedded,opts=inline] + +include::../parameters/mv_pseries_weighted_sum.asciidoc[] +include::../description/mv_pseries_weighted_sum.asciidoc[] +include::../types/mv_pseries_weighted_sum.asciidoc[] +include::../examples/mv_pseries_weighted_sum.asciidoc[] diff --git a/docs/reference/esql/functions/layout/values.asciidoc b/docs/reference/esql/functions/layout/values.asciidoc new file mode 100644 index 0000000000000..7d90d4314699a --- /dev/null +++ b/docs/reference/esql/functions/layout/values.asciidoc @@ -0,0 +1,18 @@ +// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it. + +[discrete] +[[esql-values]] +=== `VALUES` + +preview::["Do not use `VALUES` on production environments. This functionality is in technical preview and may be changed or removed in a future release. Elastic will work to fix any issues, but features in technical preview are not subject to the support SLA of official GA features."] + +*Syntax* + +[.text-center] +image::esql/functions/signature/values.svg[Embedded,opts=inline] + +include::../parameters/values.asciidoc[] +include::../description/values.asciidoc[] +include::../types/values.asciidoc[] +include::../examples/values.asciidoc[] +include::../appendix/values.asciidoc[] diff --git a/docs/reference/esql/functions/layout/weighted_avg.asciidoc b/docs/reference/esql/functions/layout/weighted_avg.asciidoc new file mode 100644 index 0000000000000..84db8f1269b94 --- /dev/null +++ b/docs/reference/esql/functions/layout/weighted_avg.asciidoc @@ -0,0 +1,15 @@ +// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it. + +[discrete] +[[esql-weighted_avg]] +=== `WEIGHTED_AVG` + +*Syntax* + +[.text-center] +image::esql/functions/signature/weighted_avg.svg[Embedded,opts=inline] + +include::../parameters/weighted_avg.asciidoc[] +include::../description/weighted_avg.asciidoc[] +include::../types/weighted_avg.asciidoc[] +include::../examples/weighted_avg.asciidoc[] diff --git a/docs/reference/esql/functions/median-absolute-deviation.asciidoc b/docs/reference/esql/functions/median-absolute-deviation.asciidoc deleted file mode 100644 index b4f80cced06f6..0000000000000 --- a/docs/reference/esql/functions/median-absolute-deviation.asciidoc +++ /dev/null @@ -1,60 +0,0 @@ -[discrete] -[[esql-agg-median-absolute-deviation]] -=== `MEDIAN_ABSOLUTE_DEVIATION` - -*Syntax* - -[source,esql] ----- -MEDIAN_ABSOLUTE_DEVIATION(expression) ----- - -*Parameters* - -`expression`:: -Expression from which to return the median absolute deviation. - -*Description* - -Returns the median absolute deviation, a measure of variability. It is a robust -statistic, meaning that it is useful for describing data that may have outliers, -or may not be normally distributed. For such data it can be more descriptive -than standard deviation. - -It is calculated as the median of each data point's deviation from the median of -the entire sample. That is, for a random variable `X`, the median absolute -deviation is `median(|median(X) - X|)`. - -NOTE: Like <>, `MEDIAN_ABSOLUTE_DEVIATION` is - <>. - -[WARNING] -==== -`MEDIAN_ABSOLUTE_DEVIATION` is also {wikipedia}/Nondeterministic_algorithm[non-deterministic]. -This means you can get slightly different results using the same data. -==== - -*Example* - -[source.merge.styled,esql] ----- -include::{esql-specs}/stats_percentile.csv-spec[tag=median-absolute-deviation] ----- -[%header.monospaced.styled,format=dsv,separator=|] -|=== -include::{esql-specs}/stats_percentile.csv-spec[tag=median-absolute-deviation-result] -|=== - -The expression can use inline functions. For example, to calculate the the -median absolute deviation of the maximum values of a multivalued column, first -use `MV_MAX` to get the maximum value per row, and use the result with the -`MEDIAN_ABSOLUTE_DEVIATION` function: - -[source.merge.styled,esql] ----- -include::{esql-specs}/stats_percentile.csv-spec[tag=docsStatsMADNestedExpression] ----- -[%header.monospaced.styled,format=dsv,separator=|] -|=== -include::{esql-specs}/stats_percentile.csv-spec[tag=docsStatsMADNestedExpression-result] -|=== diff --git a/docs/reference/esql/functions/mv-functions.asciidoc b/docs/reference/esql/functions/mv-functions.asciidoc index 0f4f6233d446c..bd5f14cdd3557 100644 --- a/docs/reference/esql/functions/mv-functions.asciidoc +++ b/docs/reference/esql/functions/mv-functions.asciidoc @@ -18,6 +18,7 @@ * <> * <> * <> +* <> * <> * <> * <> @@ -34,6 +35,7 @@ include::layout/mv_last.asciidoc[] include::layout/mv_max.asciidoc[] include::layout/mv_median.asciidoc[] include::layout/mv_min.asciidoc[] +include::layout/mv_pseries_weighted_sum.asciidoc[] include::layout/mv_slice.asciidoc[] include::layout/mv_sort.asciidoc[] include::layout/mv_sum.asciidoc[] diff --git a/docs/reference/esql/functions/parameters/count.asciidoc b/docs/reference/esql/functions/parameters/count.asciidoc new file mode 100644 index 0000000000000..d470061a83e2e --- /dev/null +++ b/docs/reference/esql/functions/parameters/count.asciidoc @@ -0,0 +1,6 @@ +// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it. + +*Parameters* + +`field`:: +Expression that outputs values to be counted. If omitted, equivalent to `COUNT(*)` (the number of rows). diff --git a/docs/reference/esql/functions/parameters/count_distinct.asciidoc b/docs/reference/esql/functions/parameters/count_distinct.asciidoc new file mode 100644 index 0000000000000..f84cf27c3e075 --- /dev/null +++ b/docs/reference/esql/functions/parameters/count_distinct.asciidoc @@ -0,0 +1,9 @@ +// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it. + +*Parameters* + +`field`:: +Column or literal for which to count the number of distinct values. + +`precision`:: +Precision threshold. Refer to <>. The maximum supported value is 40000. Thresholds above this number will have the same effect as a threshold of 40000. The default value is 3000. diff --git a/docs/reference/esql/functions/parameters/mv_pseries_weighted_sum.asciidoc b/docs/reference/esql/functions/parameters/mv_pseries_weighted_sum.asciidoc new file mode 100644 index 0000000000000..3a828f1464824 --- /dev/null +++ b/docs/reference/esql/functions/parameters/mv_pseries_weighted_sum.asciidoc @@ -0,0 +1,9 @@ +// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it. + +*Parameters* + +`number`:: +Multivalue expression. + +`p`:: +It is a constant number that represents the 'p' parameter in the P-Series. It impacts every element's contribution to the weighted sum. diff --git a/docs/reference/esql/functions/parameters/values.asciidoc b/docs/reference/esql/functions/parameters/values.asciidoc new file mode 100644 index 0000000000000..8903aa1a472a3 --- /dev/null +++ b/docs/reference/esql/functions/parameters/values.asciidoc @@ -0,0 +1,6 @@ +// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it. + +*Parameters* + +`field`:: + diff --git a/docs/reference/esql/functions/parameters/weighted_avg.asciidoc b/docs/reference/esql/functions/parameters/weighted_avg.asciidoc new file mode 100644 index 0000000000000..280dd8ca347d4 --- /dev/null +++ b/docs/reference/esql/functions/parameters/weighted_avg.asciidoc @@ -0,0 +1,9 @@ +// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it. + +*Parameters* + +`number`:: +A numeric value. + +`weight`:: +A numeric weight. diff --git a/docs/reference/esql/functions/signature/count.svg b/docs/reference/esql/functions/signature/count.svg new file mode 100644 index 0000000000000..9b19652b98788 --- /dev/null +++ b/docs/reference/esql/functions/signature/count.svg @@ -0,0 +1 @@ +COUNT(field) \ No newline at end of file diff --git a/docs/reference/esql/functions/signature/count_distinct.svg b/docs/reference/esql/functions/signature/count_distinct.svg new file mode 100644 index 0000000000000..a5b77da7c555a --- /dev/null +++ b/docs/reference/esql/functions/signature/count_distinct.svg @@ -0,0 +1 @@ +COUNT_DISTINCT(field,precision) \ No newline at end of file diff --git a/docs/reference/esql/functions/signature/mv_pseries_weighted_sum.svg b/docs/reference/esql/functions/signature/mv_pseries_weighted_sum.svg new file mode 100644 index 0000000000000..7e3b42161e52c --- /dev/null +++ b/docs/reference/esql/functions/signature/mv_pseries_weighted_sum.svg @@ -0,0 +1 @@ +MV_PSERIES_WEIGHTED_SUM(number,p) \ No newline at end of file diff --git a/docs/reference/esql/functions/signature/values.svg b/docs/reference/esql/functions/signature/values.svg new file mode 100644 index 0000000000000..0fa116ce1eb14 --- /dev/null +++ b/docs/reference/esql/functions/signature/values.svg @@ -0,0 +1 @@ +VALUES(field) \ No newline at end of file diff --git a/docs/reference/esql/functions/signature/weighted_avg.svg b/docs/reference/esql/functions/signature/weighted_avg.svg new file mode 100644 index 0000000000000..581e8cd79b0c2 --- /dev/null +++ b/docs/reference/esql/functions/signature/weighted_avg.svg @@ -0,0 +1 @@ +WEIGHTED_AVG(number,weight) \ No newline at end of file diff --git a/docs/reference/esql/functions/st_centroid_agg.asciidoc b/docs/reference/esql/functions/st_centroid_agg.asciidoc deleted file mode 100644 index c980560f8f198..0000000000000 --- a/docs/reference/esql/functions/st_centroid_agg.asciidoc +++ /dev/null @@ -1,25 +0,0 @@ -[discrete] -[[esql-agg-st-centroid]] -=== `ST_CENTROID_AGG` - -experimental::[] - -Calculate the spatial centroid over a field with spatial point geometry type. - -[source.merge.styled,esql] ----- -include::{esql-specs}/spatial.csv-spec[tag=st_centroid_agg-airports] ----- -[%header.monospaced.styled,format=dsv,separator=|] -|=== -include::{esql-specs}/spatial.csv-spec[tag=st_centroid_agg-airports-result] -|=== - -Supported types: - -[%header.monospaced.styled,format=dsv,separator=|] -|=== -v | result -geo_point | geo_point -cartesian_point | cartesian_point -|=== diff --git a/docs/reference/esql/functions/types/count.asciidoc b/docs/reference/esql/functions/types/count.asciidoc new file mode 100644 index 0000000000000..70e79d4899605 --- /dev/null +++ b/docs/reference/esql/functions/types/count.asciidoc @@ -0,0 +1,20 @@ +// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it. + +*Supported types* + +[%header.monospaced.styled,format=dsv,separator=|] +|=== +field | result +boolean | long +cartesian_point | long +datetime | long +double | long +geo_point | long +integer | long +ip | long +keyword | long +long | long +text | long +unsigned_long | long +version | long +|=== diff --git a/docs/reference/esql/functions/types/count_distinct.asciidoc b/docs/reference/esql/functions/types/count_distinct.asciidoc new file mode 100644 index 0000000000000..4b201d45732f1 --- /dev/null +++ b/docs/reference/esql/functions/types/count_distinct.asciidoc @@ -0,0 +1,44 @@ +// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it. + +*Supported types* + +[%header.monospaced.styled,format=dsv,separator=|] +|=== +field | precision | result +boolean | integer | long +boolean | long | long +boolean | unsigned_long | long +boolean | | long +datetime | integer | long +datetime | long | long +datetime | unsigned_long | long +datetime | | long +double | integer | long +double | long | long +double | unsigned_long | long +double | | long +integer | integer | long +integer | long | long +integer | unsigned_long | long +integer | | long +ip | integer | long +ip | long | long +ip | unsigned_long | long +ip | | long +keyword | integer | long +keyword | long | long +keyword | unsigned_long | long +keyword | | long +long | integer | long +long | long | long +long | unsigned_long | long +long | | long +text | integer | long +text | long | long +text | unsigned_long | long +text | | long +version | integer | long +version | long | long +version | unsigned_long | long +version | | long +|=== diff --git a/docs/reference/esql/functions/types/mv_pseries_weighted_sum.asciidoc b/docs/reference/esql/functions/types/mv_pseries_weighted_sum.asciidoc new file mode 100644 index 0000000000000..f28e61f17aa33 --- /dev/null +++ b/docs/reference/esql/functions/types/mv_pseries_weighted_sum.asciidoc @@ -0,0 +1,9 @@ +// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it. + +*Supported types* + +[%header.monospaced.styled,format=dsv,separator=|] +|=== +number | p | result +double | double | double +|=== diff --git a/docs/reference/esql/functions/types/values.asciidoc b/docs/reference/esql/functions/types/values.asciidoc new file mode 100644 index 0000000000000..705745d76dbab --- /dev/null +++ b/docs/reference/esql/functions/types/values.asciidoc @@ -0,0 +1,17 @@ +// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it. + +*Supported types* + +[%header.monospaced.styled,format=dsv,separator=|] +|=== +field | result +boolean | boolean +datetime | datetime +double | double +integer | integer +ip | ip +keyword | keyword +long | long +text | text +version | version +|=== diff --git a/docs/reference/esql/functions/types/weighted_avg.asciidoc b/docs/reference/esql/functions/types/weighted_avg.asciidoc new file mode 100644 index 0000000000000..55cc14eb453c3 --- /dev/null +++ b/docs/reference/esql/functions/types/weighted_avg.asciidoc @@ -0,0 +1,17 @@ +// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it. + +*Supported types* + +[%header.monospaced.styled,format=dsv,separator=|] +|=== +number | weight | result +double | double | double +double | integer | double +double | long | double +integer | double | double +integer | integer | double +integer | long | double +long | double | double +long | integer | double +long | long | double +|=== diff --git a/docs/reference/esql/functions/values.asciidoc b/docs/reference/esql/functions/values.asciidoc deleted file mode 100644 index f13338a572b36..0000000000000 --- a/docs/reference/esql/functions/values.asciidoc +++ /dev/null @@ -1,38 +0,0 @@ -[discrete] -[[esql-agg-values]] -=== `VALUES` - -preview::["Do not use `VALUES` on production environments. This functionality is in technical preview and may be changed or removed in a future release. Elastic will work to fix any issues, but features in technical preview are not subject to the support SLA of official GA features."] - -*Syntax* - -[source,esql] ----- -VALUES(expression) ----- - -`expression`:: -Expression of any type except `geo_point`, `cartesian_point`, `geo_shape`, or `cartesian_shape`. - -*Description* - -Returns all values in a group as a multivalued field. The order of the returned values isn't guaranteed. -If you need the values returned in order use <>. - -WARNING: This can use a significant amount of memory and ES|QL doesn't yet - grow aggregations beyond memory. So this aggregation will work until - it is used to collect more values than can fit into memory. Once it - collects too many values it will fail the query with - a <>. - -*Example* - -[source.merge.styled,esql] ----- -include::{esql-specs}/string.csv-spec[tag=values-grouped] ----- -[%header.monospaced.styled,format=dsv,separator=|] -|=== -include::{esql-specs}/string.csv-spec[tag=values-grouped-result] -|=== - diff --git a/docs/reference/esql/functions/weighted-avg.asciidoc b/docs/reference/esql/functions/weighted-avg.asciidoc deleted file mode 100644 index 4f166801641df..0000000000000 --- a/docs/reference/esql/functions/weighted-avg.asciidoc +++ /dev/null @@ -1,35 +0,0 @@ -[discrete] -[[esql-agg-weighted-avg]] -=== `WEIGHTED_AVG` - -*Syntax* - -[source,esql] ----- -WEIGHTED_AVG(expression, weight) ----- - -`expression`:: -Numeric expression. - -`weight`:: -Numeric weight. - -*Description* - -The weighted average of a numeric expression. - -*Supported types* - -The result is always a `double` no matter the input type. - -*Examples* - -[source.merge.styled,esql] ----- -include::{esql-specs}/stats.csv-spec[tag=weighted-avg] ----- -[%header.monospaced.styled,format=dsv,separator=|] -|=== -include::{esql-specs}/stats.csv-spec[tag=weighted-avg-result] -|=== diff --git a/docs/reference/inference/service-cohere.asciidoc b/docs/reference/inference/service-cohere.asciidoc index 52d71e0bc02a5..84eae6e880617 100644 --- a/docs/reference/inference/service-cohere.asciidoc +++ b/docs/reference/inference/service-cohere.asciidoc @@ -131,6 +131,7 @@ Specify whether to return doc text within the results. `top_n`:: (Optional, integer) The number of most relevant documents to return, defaults to the number of the documents. +If this {infer} endpoint is used in a `text_similarity_reranker` retriever query and `top_n` is set, it must be greater than or equal to `rank_window_size` in the query. ===== + .`task_settings` for the `text_embedding` task type diff --git a/docs/reference/ingest/processors/geoip.asciidoc b/docs/reference/ingest/processors/geoip.asciidoc index 738ac234d6162..230146d483144 100644 --- a/docs/reference/ingest/processors/geoip.asciidoc +++ b/docs/reference/ingest/processors/geoip.asciidoc @@ -64,12 +64,12 @@ depend on what has been found and which properties were configured in `propertie * If the GeoIP2 Domain database is used, then the following fields may be added under the `target_field`: `ip`, and `domain`. The fields actually added depend on what has been found and which properties were configured in `properties`. * If the GeoIP2 ISP database is used, then the following fields may be added under the `target_field`: `ip`, `asn`, -`organization_name`, `network`, `isp`, `isp_organization`, `mobile_country_code`, and `mobile_network_code`. The fields actually added +`organization_name`, `network`, `isp`, `isp_organization_name`, `mobile_country_code`, and `mobile_network_code`. The fields actually added depend on what has been found and which properties were configured in `properties`. * If the GeoIP2 Enterprise database is used, then the following fields may be added under the `target_field`: `ip`, `country_iso_code`, `country_name`, `continent_code`, `continent_name`, `region_iso_code`, `region_name`, `city_name`, `timezone`, `location`, `asn`, `organization_name`, `network`, `hosting_provider`, `tor_exit_node`, `anonymous_vpn`, `anonymous`, `public_proxy`, -`residential_proxy`, `domain`, `isp`, `isp_organization`, `mobile_country_code`, `mobile_network_code`, `user_type`, and +`residential_proxy`, `domain`, `isp`, `isp_organization_name`, `mobile_country_code`, `mobile_network_code`, `user_type`, and `connection_type`. The fields actually added depend on what has been found and which properties were configured in `properties`. preview::["Do not use the GeoIP2 Anonymous IP, GeoIP2 Connection Type, GeoIP2 Domain, GeoIP2 ISP, and GeoIP2 Enterprise databases in production environments. This functionality is in technical preview and may be changed or removed in a future release. Elastic will work to fix any issues, but features in technical preview are not subject to the support SLA of official GA features."] diff --git a/docs/reference/query-dsl/query-string-query.asciidoc b/docs/reference/query-dsl/query-string-query.asciidoc index 319ede7c4ac05..b45247ace3735 100644 --- a/docs/reference/query-dsl/query-string-query.asciidoc +++ b/docs/reference/query-dsl/query-string-query.asciidoc @@ -30,7 +30,7 @@ If you don't need to support a query syntax, consider using the syntax, use the <> query, which is less strict. ==== - + [[query-string-query-ex-request]] ==== Example request @@ -83,7 +83,7 @@ could be expensive. There is a limit on the number of fields times terms that can be queried at once. It is defined by the `indices.query.bool.max_clause_count` -<>, which defaults to 4096. +<>. ==== -- diff --git a/docs/reference/query-dsl/span-multi-term-query.asciidoc b/docs/reference/query-dsl/span-multi-term-query.asciidoc index aefb3e4b75eb5..5a5f0e1f5ff99 100644 --- a/docs/reference/query-dsl/span-multi-term-query.asciidoc +++ b/docs/reference/query-dsl/span-multi-term-query.asciidoc @@ -39,7 +39,8 @@ GET /_search -------------------------------------------------- WARNING: `span_multi` queries will hit too many clauses failure if the number of terms that match the query exceeds the -boolean query limit (defaults to 4096).To avoid an unbounded expansion you can set the <>. +To avoid an unbounded expansion you can set the <> of the multi term query to `top_terms_*` rewrite. Or, if you use `span_multi` on `prefix` query only, you can activate the <> field option of the `text` field instead. This will rewrite any prefix query on the field to a single term query that matches the indexed prefix. diff --git a/docs/reference/search/retriever.asciidoc b/docs/reference/search/retriever.asciidoc index b86339b905631..1b7376c21daab 100644 --- a/docs/reference/search/retriever.asciidoc +++ b/docs/reference/search/retriever.asciidoc @@ -209,6 +209,11 @@ GET /index/_search The `text_similarity_reranker` is a type of retriever that enhances search results by re-ranking documents based on semantic similarity to a specified inference text, using a machine learning model. +[TIP] +==== +Refer to <> for a high level overview of semantic reranking. +==== + ===== Prerequisites To use `text_similarity_reranker` you must first set up a `rerank` task using the <>. @@ -262,13 +267,13 @@ GET /index/_search "text_similarity_reranker": { "retriever": { "standard": { ... } - } - }, - "field": "text", - "inference_id": "my-cohere-rerank-model", - "inference_text": "Most famous landmark in Paris", - "rank_window_size": 100, - "min_score": 0.5 + }, + "field": "text", + "inference_id": "my-cohere-rerank-model", + "inference_text": "Most famous landmark in Paris", + "rank_window_size": 100, + "min_score": 0.5 + } } } ---- diff --git a/docs/reference/search/search-shards.asciidoc b/docs/reference/search/search-shards.asciidoc index b9646f4d37303..49045acf4c484 100644 --- a/docs/reference/search/search-shards.asciidoc +++ b/docs/reference/search/search-shards.asciidoc @@ -63,6 +63,7 @@ include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=preference] include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=routing] +include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=master-timeout] [[search-shards-api-example]] ==== {api-examples-title} diff --git a/docs/reference/search/search-your-data/retrievers-reranking/semantic-reranking.asciidoc b/docs/reference/search/search-your-data/retrievers-reranking/semantic-reranking.asciidoc index 75c06aa953302..f25741fca0b8f 100644 --- a/docs/reference/search/search-your-data/retrievers-reranking/semantic-reranking.asciidoc +++ b/docs/reference/search/search-your-data/retrievers-reranking/semantic-reranking.asciidoc @@ -5,7 +5,7 @@ preview::[] [TIP] ==== -This overview focuses more on the high-level concepts and use cases for semantic reranking. For full implementation details on how to set up and use semantic reranking in {es}, see the <> in the Search API docs. +This overview focuses more on the high-level concepts and use cases for semantic reranking. For full implementation details on how to set up and use semantic reranking in {es}, see the <> in the Search API docs. ==== Rerankers improve the relevance of results from earlier-stage retrieval mechanisms. @@ -89,11 +89,16 @@ In {es}, semantic rerankers are implemented using the {es} <>. +. *Choose a reranking model*. +Currently you can: + +** Integrate directly with the <> using the `rerank` task type +** Integrate directly with the <> using the `rerank` task type +** Upload a model to {es} from Hugging Face with {eland-docs}/machine-learning.html#ml-nlp-pytorch[Eland] +*** Then set up an <> with the `rerank` task type +. *Create a `rerank` task using the <>*. The Inference API creates an inference endpoint and configures your chosen machine learning model to perform the reranking task. -. Define a `text_similarity_reranker` retriever in your search request. +. *Define a `text_similarity_reranker` retriever in your search request*. The retriever syntax makes it simple to configure both the retrieval and reranking of search results in a single API call. .*Example search request* with semantic reranker @@ -127,20 +132,6 @@ POST _search // TEST[skip:TBD] ============== -[discrete] -[[semantic-reranking-types]] -==== Supported reranking types - -The following `text_similarity_reranker` model configuration options are available. - -*Text similarity with cross-encoder* - -This solution uses a hosted or 3rd party inference service which relies on a cross-encoder model. -The model receives the text fields from the _top-K_ documents, as well as the search query, and calculates scores directly, which are then used to rerank the documents. - -Used with the Cohere inference service rolled out in 8.13, turn on semantic reranking that works out of the box. -Check out our https://github.com/elastic/elasticsearch-labs/blob/main/notebooks/integrations/cohere/cohere-elasticsearch.ipynb[Python notebook] for using Cohere with {es}. - [discrete] [[semantic-reranking-learn-more]] ==== Learn more diff --git a/docs/reference/search/search-your-data/semantic-search-inference.asciidoc b/docs/reference/search/search-your-data/semantic-search-inference.asciidoc index ae27b46d4b876..f74bc65e31bf0 100644 --- a/docs/reference/search/search-your-data/semantic-search-inference.asciidoc +++ b/docs/reference/search/search-your-data/semantic-search-inference.asciidoc @@ -11,8 +11,7 @@ IMPORTANT: For the easiest way to perform semantic search in the {stack}, refer The following examples use Cohere's `embed-english-v3.0` model, the `all-mpnet-base-v2` model from HuggingFace, and OpenAI's `text-embedding-ada-002` second generation embedding model. You can use any Cohere and OpenAI models, they are all supported by the {infer} API. -For a list of supported models available on HuggingFace, refer to -<>. +For a list of recommended models available on HuggingFace, refer to <>. Azure based examples use models available through https://ai.azure.com/explore/models?selectedTask=embeddings[Azure AI Studio] or https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models[Azure OpenAI]. @@ -40,8 +39,7 @@ include::{es-ref-dir}/tab-widgets/inference-api/infer-api-task-widget.asciidoc[] ==== Create the index mapping The mapping of the destination index - the index that contains the embeddings that the model will create based on your input text - must be created. -The destination index must have a field with the <> -field type to index the output of the used model. +The destination index must have a field with the <> field type for most models and the <> field type for the sparse vector models like in the case of the `elser` service to index the output of the used model. include::{es-ref-dir}/tab-widgets/inference-api/infer-api-mapping-widget.asciidoc[] @@ -49,8 +47,7 @@ include::{es-ref-dir}/tab-widgets/inference-api/infer-api-mapping-widget.asciido [[infer-service-inference-ingest-pipeline]] ==== Create an ingest pipeline with an inference processor -Create an <> with an -<> and use the model you created above to infer against the data that is being ingested in the pipeline. +Create an <> with an <> and use the model you created above to infer against the data that is being ingested in the pipeline. include::{es-ref-dir}/tab-widgets/inference-api/infer-api-ingest-pipeline-widget.asciidoc[] @@ -103,8 +100,8 @@ POST _tasks//_cancel ==== Semantic search After the data set has been enriched with the embeddings, you can query the data using {ref}/knn-search.html#knn-semantic-search[semantic search]. -Pass a -`query_vector_builder` to the k-nearest neighbor (kNN) vector search API, and provide the query text and the model you have used to create the embeddings. +In case of dense vector models, pass a `query_vector_builder` to the k-nearest neighbor (kNN) vector search API, and provide the query text and the model you have used to create the embeddings. +In case of a sparse vector model like ELSER, use a `sparse_vector` query, and provide the query text with the model you have used to create the embeddings. NOTE: If you cancelled the reindexing process, you run the query only a part of the data which affects the quality of your results. diff --git a/docs/reference/tab-widgets/inference-api/infer-api-ingest-pipeline-widget.asciidoc b/docs/reference/tab-widgets/inference-api/infer-api-ingest-pipeline-widget.asciidoc index 6039d1de5345b..997dbbe8a20e6 100644 --- a/docs/reference/tab-widgets/inference-api/infer-api-ingest-pipeline-widget.asciidoc +++ b/docs/reference/tab-widgets/inference-api/infer-api-ingest-pipeline-widget.asciidoc @@ -7,6 +7,12 @@ id="infer-api-ingest-cohere"> Cohere + + + + + - +