From 14a7b8fe67010e406a9c77bb084e593b743c42bb Mon Sep 17 00:00:00 2001 From: Kathleen DeRusso Date: Wed, 6 Nov 2024 14:42:06 -0500 Subject: [PATCH 01/11] Add documentation for query rules retriever (#115696) * Add initial query rules retriever docs * Add docs tests * Apply suggestions from code review Co-authored-by: Liam Thompson <32779855+leemthompo@users.noreply.github.com> * PR feedback * Make query rules guide retriever-first * Add warning to DSL doc * Update docs/reference/search/retriever.asciidoc Co-authored-by: Mike Pellegrini * Update docs/reference/search/retriever.asciidoc Co-authored-by: Mike Pellegrini * Apply suggestions from code review Co-authored-by: Mike Pellegrini * Give parameters subheading an explicit id * Fix formatting --------- Co-authored-by: Elastic Machine Co-authored-by: Liam Thompson <32779855+leemthompo@users.noreply.github.com> Co-authored-by: Mike Pellegrini --- docs/reference/query-dsl/rule-query.asciidoc | 6 + docs/reference/search/retriever.asciidoc | 237 +++++++++++++++--- .../retrievers-overview.asciidoc | 59 +++-- .../search-using-query-rules.asciidoc | 76 +++++- 4 files changed, 303 insertions(+), 75 deletions(-) diff --git a/docs/reference/query-dsl/rule-query.asciidoc b/docs/reference/query-dsl/rule-query.asciidoc index dfedc2261bbde..43e79f656a55a 100644 --- a/docs/reference/query-dsl/rule-query.asciidoc +++ b/docs/reference/query-dsl/rule-query.asciidoc @@ -12,6 +12,12 @@ The old syntax using `rule_query` and `ruleset_id` is deprecated and will be removed in a future release, so it is strongly advised to migrate existing rule queries to the new API structure. ==== +[TIP] +==== +The rule query is not supported for use alongside reranking. +If you want to use query rules in conjunction with reranking, use the <> instead. +==== + Applies <> to the query before returning results. Query rules can be used to promote documents in the manner of a <> based on matching defined rules, or to identify specific documents to exclude from a contextual result set. If no matching query rules are defined, the "organic" matches for the query are returned. diff --git a/docs/reference/search/retriever.asciidoc b/docs/reference/search/retriever.asciidoc index 9306d83c79136..74497c53c602c 100644 --- a/docs/reference/search/retriever.asciidoc +++ b/docs/reference/search/retriever.asciidoc @@ -1,14 +1,12 @@ [[retriever]] === Retriever -A retriever is a specification to describe top documents returned from a -search. A retriever replaces other elements of the <> +A retriever is a specification to describe top documents returned from a search. +A retriever replaces other elements of the <> that also return top documents such as <> and -<>. A retriever may have child retrievers where a -retriever with two or more children is considered a compound retriever. This -allows for complex behavior to be depicted in a tree-like structure, called -the retriever tree, to better clarify the order of operations that occur -during a search. +<>. +A retriever may have child retrievers where a retriever with two or more children is considered a compound retriever. +This allows for complex behavior to be depicted in a tree-like structure, called the retriever tree, which clarifies the order of operations that occur during a search. [TIP] ==== @@ -29,6 +27,9 @@ A <> that produces top documents from <> that enhances search results by re-ranking documents based on semantic similarity to a specified inference text, using a machine learning model. +`rule`:: +A <> that applies contextual <> to pin or exclude documents for specific queries. + [[standard-retriever]] ==== Standard Retriever @@ -44,8 +45,7 @@ Defines a query to retrieve a set of top documents. `filter`:: (Optional, <>) + -Applies a <> to this retriever -where all documents must match this query but do not contribute to the score. +Applies a <> to this retriever, where all documents must match this query but do not contribute to the score. `search_after`:: (Optional, <>) @@ -56,14 +56,13 @@ include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=terminate_after] `sort`:: + -(Optional, <>) -A sort object that that specifies the order of matching documents. +(Optional, <>) A sort object that specifies the order of matching documents. `min_score`:: (Optional, `float`) + -Minimum <> for matching documents. Documents with a -lower `_score` are not included in the top documents. +Minimum <> for matching documents. +Documents with a lower `_score` are not included in the top documents. `collapse`:: (Optional, <>) @@ -72,8 +71,7 @@ Collapses the top documents by a specified key into a single top document per ke ===== Restrictions -When a retriever tree contains a compound retriever (a retriever with two or more child -retrievers) the <> parameter is not supported. +When a retriever tree contains a compound retriever (a retriever with two or more child retrievers) the <> parameter is not supported. [discrete] [[standard-retriever-example]] @@ -105,12 +103,39 @@ POST /restaurants/_bulk?refresh {"region": "Austria", "year": "2020", "vector": [10, 22, 79]} {"index":{}} {"region": "France", "year": "2020", "vector": [10, 22, 80]} + +PUT /movies + +PUT _query_rules/my-ruleset +{ + "rules": [ + { + "rule_id": "my-rule1", + "type": "pinned", + "criteria": [ + { + "type": "exact", + "metadata": "query_string", + "values": [ "pugs" ] + } + ], + "actions": { + "ids": [ + "id1" + ] + } + } + ] +} + ---- // TESTSETUP [source,console] -------------------------------------------------- DELETE /restaurants + +DELETE /movies -------------------------------------------------- // TEARDOWN //// @@ -143,11 +168,13 @@ GET /restaurants/_search } } ---- + <1> Opens the `retriever` object. <2> The `standard` retriever is used for defining traditional {es} queries. <3> The entry point for defining the search query. <4> The `bool` object allows for combining multiple query clauses logically. -<5> The `should` array indicates conditions under which a document will match. Documents matching these conditions will increase their relevancy score. +<5> The `should` array indicates conditions under which a document will match. +Documents matching these conditions will have increased relevancy scores. <6> The `match` object finds documents where the `region` field contains the word "Austria." <7> The `filter` array provides filtering conditions that must be met but do not contribute to the relevancy score. <8> The `term` object is used for exact matches, in this case, filtering documents by the `year` field. @@ -178,8 +205,8 @@ Defines a <> to build a query vector. `k`:: (Required, integer) + -Number of nearest neighbors to return as top hits. This value must be fewer than -or equal to `num_candidates`. +Number of nearest neighbors to return as top hits. +This value must be fewer than or equal to `num_candidates`. `num_candidates`:: (Required, integer) @@ -222,16 +249,15 @@ GET /restaurants/_search <1> Configuration for k-nearest neighbor (knn) search, which is based on vector similarity. <2> Specifies the field name that contains the vectors. <3> The query vector against which document vectors are compared in the `knn` search. -<4> The number of nearest neighbors to return as top hits. This value must be fewer than or equal to `num_candidates`. +<4> The number of nearest neighbors to return as top hits. +This value must be fewer than or equal to `num_candidates`. <5> The size of the initial candidate set from which the final `k` nearest neighbors are selected. [[rrf-retriever]] ==== RRF Retriever -An <> retriever returns top documents based on the RRF formula, -equally weighting two or more child retrievers. -Reciprocal rank fusion (RRF) is a method for combining multiple result -sets with different relevance indicators into a single result set. +An <> retriever returns top documents based on the RRF formula, equally weighting two or more child retrievers. +Reciprocal rank fusion (RRF) is a method for combining multiple result sets with different relevance indicators into a single result set. ===== Parameters @@ -357,7 +383,8 @@ Refer to <> for a high level overview of semantic re-ranking ===== Prerequisites To use `text_similarity_reranker` you must first set up a `rerank` task using the <>. -The `rerank` task should be set up with a machine learning model that can compute text similarity. Refer to {ml-docs}/ml-nlp-model-ref.html#ml-nlp-model-ref-text-similarity[the Elastic NLP model reference] for a list of third-party text similarity models supported by {es}. +The `rerank` task should be set up with a machine learning model that can compute text similarity. +Refer to {ml-docs}/ml-nlp-model-ref.html#ml-nlp-model-ref-text-similarity[the Elastic NLP model reference] for a list of third-party text similarity models supported by {es}. Currently you can: @@ -368,6 +395,7 @@ Currently you can: ** Refer to the <> on this page for a step-by-step guide. ===== Parameters + `retriever`:: (Required, <>) + @@ -376,7 +404,8 @@ The child retriever that generates the initial set of top documents to be re-ran `field`:: (Required, `string`) + -The document field to be used for text similarity comparisons. This field should contain the text that will be evaluated against the `inferenceText`. +The document field to be used for text similarity comparisons. +This field should contain the text that will be evaluated against the `inferenceText`. `inference_id`:: (Required, `string`) @@ -391,25 +420,28 @@ The text snippet used as the basis for similarity comparison. `rank_window_size`:: (Optional, `int`) + -The number of top documents to consider in the re-ranking process. Defaults to `10`. +The number of top documents to consider in the re-ranking process. +Defaults to `10`. `min_score`:: (Optional, `float`) + -Sets a minimum threshold score for including documents in the re-ranked results. Documents with similarity scores below this threshold will be excluded. Note that score calculations vary depending on the model used. +Sets a minimum threshold score for including documents in the re-ranked results. +Documents with similarity scores below this threshold will be excluded. +Note that score calculations vary depending on the model used. `filter`:: (Optional, <>) + Applies the specified <> to the child <>. -If the child retriever already specifies any filters, then this top-level filter is applied in conjuction -with the filter defined in the child retriever. +If the child retriever already specifies any filters, then this top-level filter is applied in conjuction with the filter defined in the child retriever. [discrete] [[text-similarity-reranker-retriever-example-cohere]] ==== Example: Cohere Rerank -This example enables out-of-the-box semantic search by re-ranking top documents using the Cohere Rerank API. This approach eliminate the need to generate and store embeddings for all indexed documents. +This example enables out-of-the-box semantic search by re-ranking top documents using the Cohere Rerank API. +This approach eliminates the need to generate and store embeddings for all indexed documents. This requires a <> using the `rerank` task type. [source,console] @@ -459,7 +491,9 @@ Follow these steps to load the model and create a semantic re-ranker. python -m pip install eland[pytorch] ---- + -. Upload the model to {es} using Eland. This example assumes you have an Elastic Cloud deployment and an API key. Refer to the https://www.elastic.co/guide/en/elasticsearch/client/eland/current/machine-learning.html#ml-nlp-pytorch-auth[Eland documentation] for more authentication options. +. Upload the model to {es} using Eland. +This example assumes you have an Elastic Cloud deployment and an API key. +Refer to the https://www.elastic.co/guide/en/elasticsearch/client/eland/current/machine-learning.html#ml-nlp-pytorch-auth[Eland documentation] for more authentication options. + [source,sh] ---- @@ -517,14 +551,142 @@ POST movies/_search This retriever uses a standard `match` query to search the `movie` index for films tagged with the genre "drama". It then re-ranks the results based on semantic similarity to the text in the `inference_text` parameter, using the model we uploaded to {es}. +[[rule-retriever]] +==== Query Rules Retriever + +The `rule` retriever enables fine-grained control over search results by applying contextual <> to pin or exclude documents for specific queries. +This retriever has similar functionality to the <>, but works out of the box with other retrievers. + +===== Prerequisites + +To use the `rule` retriever you must first create one or more query rulesets using the <>. + +[discrete] +[[rule-retriever-parameters]] +===== Parameters + +`retriever`:: +(Required, <>) ++ +The child retriever that returns the results to apply query rules on top of. +This can be a standalone retriever such as the <> or <> retriever, or it can be a compound retriever. + +`ruleset_ids`:: +(Required, `array`) ++ +An array of one or more unique <> IDs with query-based rules to match and apply as applicable. +Rulesets and their associated rules are evaluated in the order in which they are specified in the query and ruleset. +The maximum number of rulesets to specify is 10. + +`match_criteria`:: +(Required, `object`) ++ +Defines the match criteria to apply to rules in the given query ruleset(s). +Match criteria should match the keys defined in the `criteria.metadata` field of the rule. + +`rank_window_size`:: +(Optional, `int`) ++ +The number of top documents to return from the `rule` retriever. +Defaults to `10`. + +[discrete] +[[rule-retriever-example]] +==== Example: Rule retriever + +This example shows the rule retriever executed without any additional retrievers. +It runs the query defined by the `retriever` and applies the rules from `my-ruleset` on top of the returned results. + +[source,console] +---- +GET movies/_search +{ + "retriever": { + "rule": { + "match_criteria": { + "query_string": "harry potter" + }, + "ruleset_ids": [ + "my-ruleset" + ], + "retriever": { + "standard": { + "query": { + "query_string": { + "query": "harry potter" + } + } + } + } + } + } +} +---- + +[discrete] +[[rule-retriever-example-rrf]] +==== Example: Rule retriever combined with RRF + +This example shows how to combine the `rule` retriever with other rerank retrievers such as <> or <>. + +[WARNING] +==== +The `rule` retriever will apply rules to any documents returned from its defined `retriever` or any of its sub-retrievers. +This means that for the best results, the `rule` retriever should be the outermost defined retriever. +Nesting a `rule` retriever as a sub-retriever under a reranker such as `rrf` or `text_similarity_reranker` may not produce the expected results. +==== + +[source,console] +---- +GET movies/_search +{ + "retriever": { + "rule": { <1> + "match_criteria": { + "query_string": "harry potter" + }, + "ruleset_ids": [ + "my-ruleset" + ], + "retriever": { + "rrf": { <2> + "retrievers": [ + { + "standard": { + "query": { + "query_string": { + "query": "sorcerer's stone" + } + } + } + }, + { + "standard": { + "query": { + "query_string": { + "query": "chamber of secrets" + } + } + } + } + ] + } + } + } + } +} +---- + +<1> The `rule` retriever is the outermost retriever, applying rules to the search results that were previously reranked using the `rrf` retriever. +<2> The `rrf` retriever returns results from all of its sub-retrievers, and the output of the `rrf` retriever is used as input to the `rule` retriever. + ==== Using `from` and `size` with a retriever tree The <> and <> parameters are provided globally as part of the general -<>. They are applied to all retrievers in a -retriever tree unless a specific retriever overrides the `size` parameter -using a different parameter such as `rank_window_size`. Though, the final -search hits are always limited to `size`. +<>. +They are applied to all retrievers in a retriever tree, unless a specific retriever overrides the `size` parameter using a different parameter such as `rank_window_size`. +Though, the final search hits are always limited to `size`. ==== Using aggregations with a retriever tree @@ -534,8 +696,8 @@ clauses in a <>. ==== Restrictions on search parameters when specifying a retriever -When a retriever is specified as part of a search the following elements are not allowed -at the top-level and instead are only allowed as elements of specific retrievers: +When a retriever is specified as part of a search, the following elements are not allowed at the top-level. +Instead they are only allowed as elements of specific retrievers: * <> * <> @@ -543,3 +705,4 @@ at the top-level and instead are only allowed as elements of specific retrievers * <> * <> * <> + diff --git a/docs/reference/search/search-your-data/retrievers-overview.asciidoc b/docs/reference/search/search-your-data/retrievers-overview.asciidoc index 9df4026fc6445..8e5955fc41782 100644 --- a/docs/reference/search/search-your-data/retrievers-overview.asciidoc +++ b/docs/reference/search/search-your-data/retrievers-overview.asciidoc @@ -16,22 +16,21 @@ For implementation details, including notable restrictions, check out the Retrievers come in various types, each tailored for different search operations. The following retrievers are currently available: -* <>. Returns top documents from a -traditional https://www.elastic.co/guide/en/elasticsearch/reference/master/query-dsl.html[query]. -Mimics a traditional query but in the context of a retriever framework. This -ensures backward compatibility as existing `_search` requests remain supported. -That way you can transition to the new abstraction at your own pace without -mixing syntaxes. -* <>. Returns top documents from a <>, -in the context of a retriever framework. -* <>. Combines and ranks multiple first-stage retrievers using -the reciprocal rank fusion (RRF) algorithm. Allows you to combine multiple result sets -with different relevance indicators into a single result set. -An RRF retriever is a *compound retriever*, where its `filter` element is -propagated to its sub retrievers. -+ - -* <>. Used for <>. +* <>. +Returns top documents from a traditional https://www.elastic.co/guide/en/elasticsearch/reference/master/query-dsl.html[query]. +Mimics a traditional query but in the context of a retriever framework. +This ensures backward compatibility as existing `_search` requests remain supported. +That way you can transition to the new abstraction at your own pace without mixing syntaxes. +* <>. +Returns top documents from a <>, in the context of a retriever framework. +* <>. +Combines and ranks multiple first-stage retrievers using the reciprocal rank fusion (RRF) algorithm. +Allows you to combine multiple result sets with different relevance indicators into a single result set. +An RRF retriever is a *compound retriever*, where its `filter` element is propagated to its sub retrievers. +* <>. +Applies <> to the query before returning results. +* <>. +Used for <>. Requires first creating a `rerank` task using the <>. [discrete] @@ -69,8 +68,11 @@ When using compound retrievers, only the query element is allowed, which enforce [[retrievers-overview-example]] ==== Example -The following example demonstrates the powerful queries that we can now compose, and how retrievers simplify this process. We can use any combination of retrievers we want, propagating the -results of a nested retriever to its parent. In this scenario, we'll make use of all 4 (currently) available retrievers, i.e. `standard`, `knn`, `text_similarity_reranker` and `rrf`. +The following example demonstrates the powerful queries that we can now compose, and how retrievers simplify this process. +We can use any combination of retrievers we want, propagating the results of a nested retriever to its parent. +In this scenario, we'll make use of 4 of our currently available retrievers, i.e. `standard`, `knn`, `text_similarity_reranker` and `rrf`. +See <> for the complete list of available retrievers. + We'll first combine the results of a `semantic` query using the `standard` retriever, and that of a `knn` search on a dense vector field, using `rrf` to get the top 100 results. Finally, we'll then rerank the top-50 results of `rrf` using the `text_similarity_reranker` @@ -126,15 +128,18 @@ GET example-index/_search Here are some important terms: -* *Retrieval Pipeline*. Defines the entire retrieval and ranking logic to -produce top hits. -* *Retriever Tree*. A hierarchical structure that defines how retrievers interact. -* *First-stage Retriever*. Returns an initial set of candidate documents. -* *Compound Retriever*. Builds on one or more retrievers, -enhancing document retrieval and ranking logic. -* *Combiners*. Compound retrievers that merge top hits -from multiple sub-retrievers. -* *Rerankers*. Special compound retrievers that reorder hits and may adjust the number of hits, with distinctions between first-stage and second-stage rerankers. +* *Retrieval Pipeline*. +Defines the entire retrieval and ranking logic to produce top hits. +* *Retriever Tree*. +A hierarchical structure that defines how retrievers interact. +* *First-stage Retriever*. +Returns an initial set of candidate documents. +* *Compound Retriever*. +Builds on one or more retrievers, enhancing document retrieval and ranking logic. +* *Combiners*. +Compound retrievers that merge top hits from multiple sub-retrievers. +* *Rerankers*. +Special compound retrievers that reorder hits and may adjust the number of hits, with distinctions between first-stage and second-stage rerankers. [discrete] [[retrievers-overview-play-in-search]] diff --git a/docs/reference/search/search-your-data/search-using-query-rules.asciidoc b/docs/reference/search/search-your-data/search-using-query-rules.asciidoc index 18be825d02376..7d9d14684beee 100644 --- a/docs/reference/search/search-your-data/search-using-query-rules.asciidoc +++ b/docs/reference/search/search-your-data/search-using-query-rules.asciidoc @@ -10,7 +10,7 @@ _Query rules_ allow customization of search results for queries that match speci This allows for more control over results, for example ensuring that promoted documents that match defined criteria are returned at the top of the result list. Metadata is defined in the query rule, and is matched against the query criteria. Query rules use metadata to match a query. -Metadata is provided as part of the <> as an object and can be anything that helps differentiate the query, for example: +Metadata is provided as part of the search request as an object and can be anything that helps differentiate the query, for example: * A user-entered query string * Personalized metadata about users (e.g. country, language, etc) @@ -18,13 +18,13 @@ Metadata is provided as part of the <> as an o * A referring site * etc. -Query rules define a metadata key that will be used to match the metadata provided in the <> with the criteria specified in the rule. +Query rules define a metadata key that will be used to match the metadata provided in the <> with the criteria specified in the rule. -When a query rule matches the <> metadata according to its defined criteria, the query rule action is applied to the underlying `organic` query. +When a query rule matches the rule metadata according to its defined criteria, the query rule action is applied to the underlying `organic` query. For example, a query rule could be defined to match a user-entered query string of `pugs` and a country `us` and promote adoptable shelter dogs if the rule query met both criteria. -Rules are defined using the <> and searched using the <>. +Rules are defined using the <> and searched using the <> or the <>. [discrete] [[query-rule-definition]] @@ -189,9 +189,11 @@ You can use the <> call to retrieve the ruleset you just crea [discrete] [[rule-query-search]] -==== Perform a rule query +==== Search using query rules + +Once you have defined one or more query rulesets, you can search using these rulesets using the <> or the <>. +Retrievers are the recommended way to use rule queries, as they will work out of the box with other reranking retrievers such as <>. -Once you have defined one or more query rulesets, you can search these rulesets using the <> query. Rulesets are evaluated in order, so rules in the first ruleset you specify will be applied before any subsequent rulesets. An example query for the `my-ruleset` defined above is: @@ -200,18 +202,22 @@ An example query for the `my-ruleset` defined above is: ---- GET /my-index-000001/_search { - "query": { + "retriever": { "rule": { - "organic": { - "query_string": { - "query": "puggles" + "retriever": { + "standard": { + "query": { + "query_string": { + "query": "puggles" + } + } } }, "match_criteria": { "query_string": "puggles", "user_country": "us" }, - "ruleset_ids": ["my-ruleset"] + "ruleset_ids": [ "my-ruleset" ] } } } @@ -227,3 +233,51 @@ In this case, the rules are applied in the following order: - Where the matching rule appears in the ruleset - If multiple documents are specified in a single rule, in the order they are specified - If a document is matched by both a `pinned` rule and an `exclude` rule, the `exclude` rule will take precedence + +You can specify reranking retrievers such as <> or <> in the rule query to apply query rules on already-reranked results. +Here is an example: + +[source,console] +---- +GET my-index-000001/_search +{ + "retriever": { + "rule": { + "match_criteria": { + "query_string": "puggles", + "user_country": "us" + }, + "ruleset_ids": [ + "my-ruleset" + ], + "retriever": { + "rrf": { + "retrievers": [ + { + "standard": { + "query": { + "query_string": { + "query": "pugs" + } + } + } + }, + { + "standard": { + "query": { + "query_string": { + "query": "puggles" + } + } + } + } + ] + } + } + } + } +} +---- +// TEST[continued] + +This will apply pinned and excluded query rules on top of the content that was reranked by RRF. From 0e044d70db20d67e0bc43d7671b4cfcffbcc4bae Mon Sep 17 00:00:00 2001 From: Fang Xing <155562079+fang-xing-esql@users.noreply.github.com> Date: Wed, 6 Nov 2024 15:23:53 -0500 Subject: [PATCH 02/11] [ES|QL] Verify aggregation filter's type is boolean to avoid class_cast_exception (#116274) * validate agg filter's type is boolean --- docs/changelog/116274.yaml | 5 +++++ .../elasticsearch/xpack/esql/analysis/Verifier.java | 5 +++++ .../xpack/esql/analysis/VerifierTests.java | 13 +++++++++++++ 3 files changed, 23 insertions(+) create mode 100644 docs/changelog/116274.yaml diff --git a/docs/changelog/116274.yaml b/docs/changelog/116274.yaml new file mode 100644 index 0000000000000..9d506c7725afd --- /dev/null +++ b/docs/changelog/116274.yaml @@ -0,0 +1,5 @@ +pr: 116274 +summary: "[ES|QL] Verify aggregation filter's type is boolean to avoid `class_cast_exception`" +area: ES|QL +type: bug +issues: [] diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Verifier.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Verifier.java index 01af8adbfca67..0fa2a69438358 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Verifier.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Verifier.java @@ -69,6 +69,7 @@ import static org.elasticsearch.xpack.esql.common.Failure.fail; import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.FIRST; import static org.elasticsearch.xpack.esql.core.type.DataType.BOOLEAN; +import static org.elasticsearch.xpack.esql.core.type.DataType.NULL; /** * This class is part of the planner. Responsible for failing impossible queries with a human-readable error message. In particular, this @@ -315,6 +316,10 @@ private static void checkInvalidNamedExpressionUsage( Expression filter = fe.filter(); failures.add(fail(filter, "WHERE clause allowed only for aggregate functions, none found in [{}]", fe.sourceText())); } + Expression f = fe.filter(); // check the filter has to be a boolean term, similar as checkFilterConditionType + if (f.dataType() != NULL && f.dataType() != BOOLEAN) { + failures.add(fail(f, "Condition expression needs to be boolean, found [{}]", f.dataType())); + } // but that the filter doesn't use grouping or aggregate functions fe.filter().forEachDown(c -> { if (c instanceof AggregateFunction af) { diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java index acf0770cc910d..d6cda4a3a9ff7 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java @@ -394,6 +394,19 @@ public void testAggFilterOnBucketingOrAggFunctions() { assertEquals("1:60: Unknown column [m]", error("from test | stats m = max(languages), min(languages) WHERE m + 2 > 1 by emp_no")); } + public void testAggWithNonBooleanFilter() { + for (String filter : List.of("\"true\"", "1", "1 + 0", "concat(\"a\", \"b\")")) { + String type = (filter.equals("1") || filter.equals("1 + 0")) ? "INTEGER" : "KEYWORD"; + assertEquals("1:19: Condition expression needs to be boolean, found [" + type + "]", error("from test | where " + filter)); + for (String by : List.of("", " by languages", " by bucket(salary, 10)")) { + assertEquals( + "1:34: Condition expression needs to be boolean, found [" + type + "]", + error("from test | stats count(*) where " + filter + by) + ); + } + } + } + public void testGroupingInsideAggsAsAgg() { assertEquals( "1:18: can only use grouping function [bucket(emp_no, 5.)] part of the BY clause", From 27a94e22fb3c8683f1a31dbb86659431deebac6c Mon Sep 17 00:00:00 2001 From: Mark Vieira Date: Wed, 6 Nov 2024 12:24:08 -0800 Subject: [PATCH 03/11] Unmute ArchiveTests --- muted-tests.yml | 6 ------ 1 file changed, 6 deletions(-) diff --git a/muted-tests.yml b/muted-tests.yml index e351add36d757..e26356f832038 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -264,18 +264,12 @@ tests: - class: org.elasticsearch.xpack.security.operator.OperatorPrivilegesIT method: testEveryActionIsEitherOperatorOnlyOrNonOperator issue: https://github.com/elastic/elasticsearch/issues/102992 -- class: org.elasticsearch.packaging.test.ArchiveTests - method: test40AutoconfigurationNotTriggeredWhenNodeIsMeantToJoinExistingCluster - issue: https://github.com/elastic/elasticsearch/issues/116289 - class: org.elasticsearch.datastreams.DataStreamsClientYamlTestSuiteIT issue: https://github.com/elastic/elasticsearch/issues/116291 - class: org.elasticsearch.xpack.test.rest.XPackRestIT issue: https://github.com/elastic/elasticsearch/issues/114723 - class: org.elasticsearch.xpack.search.AsyncSearchSecurityIT issue: https://github.com/elastic/elasticsearch/issues/116293 -- class: org.elasticsearch.packaging.test.ArchiveTests - method: test43AutoconfigurationNotTriggeredWhenTlsAlreadyConfigured - issue: https://github.com/elastic/elasticsearch/issues/116317 - class: org.elasticsearch.xpack.downsample.DownsampleRestIT issue: https://github.com/elastic/elasticsearch/issues/116326 - class: org.elasticsearch.xpack.downsample.DownsampleWithBasicRestIT From 9829cd22e922d32105de4bde6d48366f09d4ecf4 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine <58790826+elasticsearchmachine@users.noreply.github.com> Date: Thu, 7 Nov 2024 08:24:48 +1100 Subject: [PATCH 04/11] Mute org.elasticsearch.xpack.shutdown.NodeShutdownIT testAllocationPreventedForRemoval #116363 --- muted-tests.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/muted-tests.yml b/muted-tests.yml index e26356f832038..529f5d75ad0ec 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -282,6 +282,9 @@ tests: - class: org.elasticsearch.backwards.MixedClusterClientYamlTestSuiteIT method: test {p0=synonyms/90_synonyms_reloading_for_synset/Reload analyzers for specific synonym set} issue: https://github.com/elastic/elasticsearch/issues/116332 +- class: org.elasticsearch.xpack.shutdown.NodeShutdownIT + method: testAllocationPreventedForRemoval + issue: https://github.com/elastic/elasticsearch/issues/116363 # Examples: # From dc597847d0c10f575867f93079e2fc8b1c46c2cd Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Wed, 6 Nov 2024 17:34:18 -0500 Subject: [PATCH 05/11] ESQL: Allow duplicate warnings in some tests (#116199) This fixes a test, actually in serverless Elasticsearch, that gets duplicate warnings. We'd like not to emit these duplicate warnings, but at this point it isn't worth it. So, for now, in some tests we allow duplicate warnings. In most of our tests we do not allow duplicate warnings so that we don't make *more* duplicate warnings without thinking about it. --- .../esql/qa/mixed/MixedClusterEsqlSpecIT.java | 11 +++ .../xpack/esql/qa/rest/EsqlSpecTestCase.java | 32 ++++++--- .../xpack/esql/qa/rest/RestEsqlTestCase.java | 67 +++++++------------ .../xpack/esql/AssertWarnings.java | 52 ++++++++++++++ .../xpack/esql/CsvSpecReader.java | 20 ++++++ .../xpack/esql/EsqlTestUtils.java | 14 ---- .../elasticsearch/xpack/esql/CsvTests.java | 2 +- 7 files changed, 133 insertions(+), 65 deletions(-) create mode 100644 x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/AssertWarnings.java diff --git a/x-pack/plugin/esql/qa/server/mixed-cluster/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/mixed/MixedClusterEsqlSpecIT.java b/x-pack/plugin/esql/qa/server/mixed-cluster/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/mixed/MixedClusterEsqlSpecIT.java index 0e23b29172c32..801e1d12b1d4a 100644 --- a/x-pack/plugin/esql/qa/server/mixed-cluster/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/mixed/MixedClusterEsqlSpecIT.java +++ b/x-pack/plugin/esql/qa/server/mixed-cluster/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/mixed/MixedClusterEsqlSpecIT.java @@ -91,4 +91,15 @@ protected boolean enableRoundingDoubleValuesOnAsserting() { protected boolean supportsInferenceTestService() { return false; } + + @Override + protected boolean deduplicateExactWarnings() { + /* + * In ESQL's main tests we shouldn't have to deduplicate but in + * serverless, where we reuse this test case exactly with *slightly* + * different configuration, we must deduplicate. So we do it here. + * It's a bit of a loss of precision, but that's ok. + */ + return true; + } } diff --git a/x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/EsqlSpecTestCase.java b/x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/EsqlSpecTestCase.java index 57f58fc448822..6ebf05755ef5e 100644 --- a/x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/EsqlSpecTestCase.java +++ b/x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/EsqlSpecTestCase.java @@ -26,6 +26,7 @@ import org.elasticsearch.test.rest.ESRestTestCase; import org.elasticsearch.test.rest.TestFeatureService; import org.elasticsearch.xcontent.XContentType; +import org.elasticsearch.xpack.esql.AssertWarnings; import org.elasticsearch.xpack.esql.CsvSpecReader.CsvTestCase; import org.elasticsearch.xpack.esql.CsvTestUtils; import org.elasticsearch.xpack.esql.EsqlTestUtils; @@ -47,7 +48,6 @@ import java.util.Locale; import java.util.Map; import java.util.TreeMap; -import java.util.regex.Pattern; import java.util.stream.Collectors; import java.util.stream.IntStream; import java.util.stream.LongStream; @@ -230,7 +230,7 @@ protected final void doTest() throws Throwable { builder.tables(tables()); } - Map answer = runEsql(builder.query(testCase.query), testCase.expectedWarnings(), testCase.expectedWarningsRegex()); + Map answer = runEsql(builder.query(testCase.query), testCase.assertWarnings(deduplicateExactWarnings())); var expectedColumnsWithValues = loadCsvSpecValues(testCase.expectedResults); @@ -248,16 +248,30 @@ protected final void doTest() throws Throwable { assertResults(expectedColumnsWithValues, actualColumns, actualValues, testCase.ignoreOrder, logger); } - private Map runEsql( - RequestObjectBuilder requestObject, - List expectedWarnings, - List expectedWarningsRegex - ) throws IOException { + /** + * Should warnings be de-duplicated before checking for exact matches. Defaults + * to {@code false}, but in some environments we emit duplicate warnings. We'd prefer + * not to emit duplicate warnings but for now it isn't worth fighting with. So! In + * those environments we override this to deduplicate. + *

+ * Note: This only applies to warnings declared as {@code warning:}. Those + * declared as {@code warningRegex:} are always a list of + * allowed warnings. {@code warningRegex:} matches 0 or more + * warnings. There is no need to deduplicate because there's no expectation + * of an exact match. + *

+ * + */ + protected boolean deduplicateExactWarnings() { + return false; + } + + private Map runEsql(RequestObjectBuilder requestObject, AssertWarnings assertWarnings) throws IOException { if (mode == Mode.ASYNC) { assert supportsAsync(); - return RestEsqlTestCase.runEsqlAsync(requestObject, expectedWarnings, expectedWarningsRegex); + return RestEsqlTestCase.runEsqlAsync(requestObject, assertWarnings); } else { - return RestEsqlTestCase.runEsqlSync(requestObject, expectedWarnings, expectedWarningsRegex); + return RestEsqlTestCase.runEsqlSync(requestObject, assertWarnings); } } diff --git a/x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/RestEsqlTestCase.java b/x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/RestEsqlTestCase.java index ef1e77280d0ee..505ab3adc553b 100644 --- a/x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/RestEsqlTestCase.java +++ b/x-pack/plugin/esql/qa/server/src/main/java/org/elasticsearch/xpack/esql/qa/rest/RestEsqlTestCase.java @@ -31,6 +31,7 @@ import org.elasticsearch.xcontent.ToXContent; import org.elasticsearch.xcontent.XContentBuilder; import org.elasticsearch.xcontent.XContentType; +import org.elasticsearch.xpack.esql.AssertWarnings; import org.elasticsearch.xpack.esql.EsqlTestUtils; import org.elasticsearch.xpack.esql.action.EsqlCapabilities; import org.junit.After; @@ -53,7 +54,6 @@ import java.util.Map; import java.util.Set; import java.util.function.IntFunction; -import java.util.regex.Pattern; import static java.util.Collections.emptySet; import static java.util.Map.entry; @@ -83,9 +83,6 @@ public abstract class RestEsqlTestCase extends ESRestTestCase { private static final Logger LOGGER = LogManager.getLogger(RestEsqlTestCase.class); - private static final List NO_WARNINGS = List.of(); - private static final List NO_WARNINGS_REGEX = List.of(); - private static final String MAPPING_ALL_TYPES; static { @@ -379,7 +376,7 @@ public void testCSVNoHeaderMode() throws IOException { options.addHeader("Content-Type", mediaType); options.addHeader("Accept", "text/csv; header=absent"); request.setOptions(options); - HttpEntity entity = performRequest(request, NO_WARNINGS, NO_WARNINGS_REGEX); + HttpEntity entity = performRequest(request, new AssertWarnings.NoWarnings()); String actual = Streams.copyToString(new InputStreamReader(entity.getContent(), StandardCharsets.UTF_8)); assertEquals("keyword0,0\r\n", actual); } @@ -430,11 +427,13 @@ public void testOutOfRangeComparisons() throws IOException { for (String truePredicate : trueForSingleValuesPredicates) { String comparison = fieldWithType + truePredicate; var query = requestObjectBuilder().query(format(null, "from {} | where {}", testIndexName(), comparison)); - List expectedWarnings = List.of( - "Line 1:29: evaluation of [" + comparison + "] failed, treating result as null. Only first 20 failures recorded.", - "Line 1:29: java.lang.IllegalArgumentException: single-value function encountered multi-value" + AssertWarnings assertWarnings = new AssertWarnings.ExactStrings( + List.of( + "Line 1:29: evaluation of [" + comparison + "] failed, treating result as null. Only first 20 failures recorded.", + "Line 1:29: java.lang.IllegalArgumentException: single-value function encountered multi-value" + ) ); - var result = runEsql(query, expectedWarnings, NO_WARNINGS_REGEX, mode); + var result = runEsql(query, assertWarnings, mode); var values = as(result.get("values"), ArrayList.class); assertThat( @@ -504,7 +503,7 @@ public void testInternalRange() throws IOException { for (String p : predicates) { var query = requestObjectBuilder().query(format(null, "from {} | where {}", testIndexName(), p)); - var result = runEsql(query, List.of(), NO_WARNINGS_REGEX, mode); + var result = runEsql(query, new AssertWarnings.NoWarnings(), mode); var values = as(result.get("values"), ArrayList.class); assertThat( format(null, "Comparison [{}] should return all rows with single values.", p), @@ -996,35 +995,26 @@ private static String expectedTextBody(String format, int count, @Nullable Chara } public Map runEsql(RequestObjectBuilder requestObject) throws IOException { - return runEsql(requestObject, NO_WARNINGS, NO_WARNINGS_REGEX, mode); + return runEsql(requestObject, new AssertWarnings.NoWarnings(), mode); } public static Map runEsqlSync(RequestObjectBuilder requestObject) throws IOException { - return runEsqlSync(requestObject, NO_WARNINGS, NO_WARNINGS_REGEX); + return runEsqlSync(requestObject, new AssertWarnings.NoWarnings()); } public static Map runEsqlAsync(RequestObjectBuilder requestObject) throws IOException { - return runEsqlAsync(requestObject, NO_WARNINGS, NO_WARNINGS_REGEX); + return runEsqlAsync(requestObject, new AssertWarnings.NoWarnings()); } - static Map runEsql( - RequestObjectBuilder requestObject, - List expectedWarnings, - List expectedWarningsRegex, - Mode mode - ) throws IOException { + static Map runEsql(RequestObjectBuilder requestObject, AssertWarnings assertWarnings, Mode mode) throws IOException { if (mode == ASYNC) { - return runEsqlAsync(requestObject, expectedWarnings, expectedWarningsRegex); + return runEsqlAsync(requestObject, assertWarnings); } else { - return runEsqlSync(requestObject, expectedWarnings, expectedWarningsRegex); + return runEsqlSync(requestObject, assertWarnings); } } - public static Map runEsqlSync( - RequestObjectBuilder requestObject, - List expectedWarnings, - List expectedWarningsRegex - ) throws IOException { + public static Map runEsqlSync(RequestObjectBuilder requestObject, AssertWarnings assertWarnings) throws IOException { requestObject.build(); Request request = prepareRequest(SYNC); String mediaType = attachBody(requestObject, request); @@ -1040,15 +1030,11 @@ public static Map runEsqlSync( } request.setOptions(options); - HttpEntity entity = performRequest(request, expectedWarnings, expectedWarningsRegex); + HttpEntity entity = performRequest(request, assertWarnings); return entityToMap(entity, requestObject.contentType()); } - public static Map runEsqlAsync( - RequestObjectBuilder requestObject, - List expectedWarnings, - List expectedWarningsRegex - ) throws IOException { + public static Map runEsqlAsync(RequestObjectBuilder requestObject, AssertWarnings assertWarnings) throws IOException { addAsyncParameters(requestObject); requestObject.build(); Request request = prepareRequest(ASYNC); @@ -1088,7 +1074,7 @@ public static Map runEsqlAsync( assertThat(response.getHeader("X-Elasticsearch-Async-Id"), nullValue()); assertThat(response.getHeader("X-Elasticsearch-Async-Is-Running"), is("?0")); } - assertWarnings(response, expectedWarnings, expectedWarningsRegex); + assertWarnings(response, assertWarnings); json.remove("is_running"); // remove this to not mess up later map assertions return Collections.unmodifiableMap(json); } else { @@ -1098,7 +1084,7 @@ public static Map runEsqlAsync( if (isRunning == false) { // must have completed immediately so keep_on_completion must be true assertThat(requestObject.keepOnCompletion(), is(true)); - assertWarnings(response, expectedWarnings, expectedWarningsRegex); + assertWarnings(response, assertWarnings); // we already have the results, but let's remember them so that we can compare to async get initialColumns = json.get("columns"); initialValues = json.get("values"); @@ -1128,7 +1114,7 @@ public static Map runEsqlAsync( assertEquals(initialValues, result.get("values")); } - assertWarnings(response, expectedWarnings, expectedWarningsRegex); + assertWarnings(response, assertWarnings); assertDeletable(id); return removeAsyncProperties(result); } @@ -1202,7 +1188,7 @@ static String runEsqlAsTextWithFormat(RequestObjectBuilder builder, String forma } request.setOptions(options); - HttpEntity entity = performRequest(request, NO_WARNINGS, NO_WARNINGS_REGEX); + HttpEntity entity = performRequest(request, new AssertWarnings.NoWarnings()); return Streams.copyToString(new InputStreamReader(entity.getContent(), StandardCharsets.UTF_8)); } @@ -1235,9 +1221,8 @@ private static String attachBody(RequestObjectBuilder requestObject, Request req return mediaType; } - private static HttpEntity performRequest(Request request, List allowedWarnings, List allowedWarningsRegex) - throws IOException { - return assertWarnings(performRequest(request), allowedWarnings, allowedWarningsRegex); + private static HttpEntity performRequest(Request request, AssertWarnings assertWarnings) throws IOException { + return assertWarnings(performRequest(request), assertWarnings); } private static Response performRequest(Request request) throws IOException { @@ -1250,13 +1235,13 @@ private static Response performRequest(Request request) throws IOException { return response; } - private static HttpEntity assertWarnings(Response response, List allowedWarnings, List allowedWarningsRegex) { + private static HttpEntity assertWarnings(Response response, AssertWarnings assertWarnings) { List warnings = new ArrayList<>(response.getWarnings()); warnings.removeAll(mutedWarnings()); if (shouldLog()) { LOGGER.info("RESPONSE warnings (after muted)={}", warnings); } - EsqlTestUtils.assertWarnings(warnings, allowedWarnings, allowedWarningsRegex); + assertWarnings.assertWarnings(warnings); return response.getEntity(); } diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/AssertWarnings.java b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/AssertWarnings.java new file mode 100644 index 0000000000000..f606d36ee6b6c --- /dev/null +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/AssertWarnings.java @@ -0,0 +1,52 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql; + +import java.util.List; +import java.util.regex.Pattern; + +import static org.elasticsearch.test.ListMatcher.matchesList; +import static org.elasticsearch.test.MapMatcher.assertMap; +import static org.junit.Assert.assertTrue; + +/** + * How should we assert the warnings returned by ESQL. + */ +public interface AssertWarnings { + void assertWarnings(List warnings); + + record NoWarnings() implements AssertWarnings { + @Override + public void assertWarnings(List warnings) { + assertMap(warnings.stream().sorted().toList(), matchesList()); + } + } + + record ExactStrings(List expected) implements AssertWarnings { + @Override + public void assertWarnings(List warnings) { + assertMap(warnings.stream().sorted().toList(), matchesList(expected.stream().sorted().toList())); + } + } + + record DeduplicatedStrings(List expected) implements AssertWarnings { + @Override + public void assertWarnings(List warnings) { + assertMap(warnings.stream().sorted().distinct().toList(), matchesList(expected.stream().sorted().toList())); + } + } + + record AllowedRegexes(List expected) implements AssertWarnings { + @Override + public void assertWarnings(List warnings) { + for (String warning : warnings) { + assertTrue("Unexpected warning: " + warning, expected.stream().anyMatch(x -> x.matcher(warning).matches())); + } + } + } +} diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvSpecReader.java b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvSpecReader.java index 781ae5531c6f0..84e06e0c1b674 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvSpecReader.java +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvSpecReader.java @@ -142,6 +142,26 @@ public void adjustExpectedWarnings(Function updater) { public List expectedWarningsRegex() { return expectedWarningsRegex; } + + /** + * How should we assert the warnings returned by ESQL. + * @param deduplicateExact Should tests configured with {@code warnings:} deduplicate + * the warnings before asserting? Normally don't do it because + * duplicate warnings are lame. We'd like to fix them all. But + * in multi-node and multi-shard tests we can emit duplicate + * warnings and it isn't worth fixing them now. + */ + public AssertWarnings assertWarnings(boolean deduplicateExact) { + if (expectedWarnings.isEmpty() == false) { + return deduplicateExact + ? new AssertWarnings.DeduplicatedStrings(expectedWarnings) + : new AssertWarnings.ExactStrings(expectedWarnings); + } + if (expectedWarningsRegex.isEmpty() == false) { + return new AssertWarnings.AllowedRegexes(expectedWarningsRegex); + } + return new AssertWarnings.NoWarnings(); + } } } diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/EsqlTestUtils.java b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/EsqlTestUtils.java index e755ddb4d0d10..c5c3788fbce47 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/EsqlTestUtils.java +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/EsqlTestUtils.java @@ -97,7 +97,6 @@ import java.util.Set; import java.util.TreeMap; import java.util.jar.JarInputStream; -import java.util.regex.Pattern; import java.util.zip.ZipEntry; import static java.util.Collections.emptyList; @@ -118,8 +117,6 @@ import static org.elasticsearch.test.ESTestCase.randomMillisUpToYear9999; import static org.elasticsearch.test.ESTestCase.randomShort; import static org.elasticsearch.test.ESTestCase.randomZone; -import static org.elasticsearch.test.ListMatcher.matchesList; -import static org.elasticsearch.test.MapMatcher.assertMap; import static org.elasticsearch.xpack.esql.core.tree.Source.EMPTY; import static org.elasticsearch.xpack.esql.core.type.DataType.INTEGER; import static org.elasticsearch.xpack.esql.core.type.DataType.NULL; @@ -129,7 +126,6 @@ import static org.elasticsearch.xpack.esql.parser.ParserUtils.ParamClassification.PATTERN; import static org.elasticsearch.xpack.esql.parser.ParserUtils.ParamClassification.VALUE; import static org.hamcrest.Matchers.instanceOf; -import static org.junit.Assert.assertTrue; public final class EsqlTestUtils { @@ -407,16 +403,6 @@ public static String randomEnrichCommand(String name, Enrich.Mode mode, String m return String.join(" | ", all); } - public static void assertWarnings(List warnings, List allowedWarnings, List allowedWarningsRegex) { - if (allowedWarningsRegex.isEmpty()) { - assertMap(warnings.stream().sorted().toList(), matchesList(allowedWarnings.stream().sorted().toList())); - } else { - for (String warning : warnings) { - assertTrue("Unexpected warning: " + warning, allowedWarningsRegex.stream().anyMatch(x -> x.matcher(warning).matches())); - } - } - } - /** * "tables" provided in the context for the LOOKUP command. If you * add to this, you must also add to {@code EsqlSpecTestCase#tables}; diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java index c05a7d51b3b2f..348ca4acd100e 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/CsvTests.java @@ -496,7 +496,7 @@ private void assertWarnings(List warnings) { normalized.add(normW); } } - EsqlTestUtils.assertWarnings(normalized, testCase.expectedWarnings(), testCase.expectedWarningsRegex()); + testCase.assertWarnings(false).assertWarnings(normalized); } PlanRunner planRunner(BigArrays bigArrays, TestPhysicalOperationProviders physicalOperationProviders) { From 90600bf7a39f81a429e3d661bee90a82af8dbdde Mon Sep 17 00:00:00 2001 From: Quentin Pradet Date: Thu, 7 Nov 2024 06:40:19 +0400 Subject: [PATCH 06/11] Add missing header in put_data_lifecycle rest-api-spec (#116292) --- docs/changelog/116292.yaml | 5 +++++ .../rest-api-spec/api/indices.put_data_lifecycle.json | 3 ++- 2 files changed, 7 insertions(+), 1 deletion(-) create mode 100644 docs/changelog/116292.yaml diff --git a/docs/changelog/116292.yaml b/docs/changelog/116292.yaml new file mode 100644 index 0000000000000..f741c67bea155 --- /dev/null +++ b/docs/changelog/116292.yaml @@ -0,0 +1,5 @@ +pr: 116292 +summary: Add missing header in `put_data_lifecycle` rest-api-spec +area: Data streams +type: bug +issues: [] diff --git a/rest-api-spec/src/main/resources/rest-api-spec/api/indices.put_data_lifecycle.json b/rest-api-spec/src/main/resources/rest-api-spec/api/indices.put_data_lifecycle.json index 08dc7128234b9..0a2f7b33498cf 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/api/indices.put_data_lifecycle.json +++ b/rest-api-spec/src/main/resources/rest-api-spec/api/indices.put_data_lifecycle.json @@ -7,7 +7,8 @@ "stability":"stable", "visibility":"public", "headers":{ - "accept": [ "application/json"] + "accept": [ "application/json"], + "content_type": ["application/json"] }, "url": { "paths": [ From 4e990940c0f4441ca9044836b079485f4402bd05 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine <58790826+elasticsearchmachine@users.noreply.github.com> Date: Thu, 7 Nov 2024 17:04:03 +1100 Subject: [PATCH 07/11] Mute org.elasticsearch.xpack.esql.qa.mixed.MixedClusterEsqlSpecIT test {categorize.Categorize ASYNC} #116373 --- muted-tests.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/muted-tests.yml b/muted-tests.yml index 529f5d75ad0ec..6e4979653c8de 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -285,6 +285,9 @@ tests: - class: org.elasticsearch.xpack.shutdown.NodeShutdownIT method: testAllocationPreventedForRemoval issue: https://github.com/elastic/elasticsearch/issues/116363 +- class: org.elasticsearch.xpack.esql.qa.mixed.MixedClusterEsqlSpecIT + method: test {categorize.Categorize ASYNC} + issue: https://github.com/elastic/elasticsearch/issues/116373 # Examples: # From 3bc094fef1d034bfb0d251fae1f66c8fb8659155 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine <58790826+elasticsearchmachine@users.noreply.github.com> Date: Thu, 7 Nov 2024 17:51:04 +1100 Subject: [PATCH 08/11] Mute org.elasticsearch.xpack.searchablesnapshots.SearchableSnapshotsIntegTests testCreateAndRestoreSearchableSnapshot #116377 --- muted-tests.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/muted-tests.yml b/muted-tests.yml index 6e4979653c8de..c996c8d25db56 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -288,6 +288,9 @@ tests: - class: org.elasticsearch.xpack.esql.qa.mixed.MixedClusterEsqlSpecIT method: test {categorize.Categorize ASYNC} issue: https://github.com/elastic/elasticsearch/issues/116373 +- class: org.elasticsearch.xpack.searchablesnapshots.SearchableSnapshotsIntegTests + method: testCreateAndRestoreSearchableSnapshot + issue: https://github.com/elastic/elasticsearch/issues/116377 # Examples: # From 32b1fc21c7c20500481af0b97495996fd9f54e6c Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Thu, 7 Nov 2024 18:02:10 +1100 Subject: [PATCH 09/11] [Test] Fix S3ServiceTests.testRetryOn403RetryPolicy (#115993) AWS's default retry condition defines a list of retryable error status. They are retryable for the custom retryable_403 condition as well. This PR enhances the test to excercise this logic correctly. Resolves: #115986 --- .../repositories/s3/S3ServiceTests.java | 26 +++++++++++++++---- muted-tests.yml | 3 --- 2 files changed, 21 insertions(+), 8 deletions(-) diff --git a/modules/repository-s3/src/test/java/org/elasticsearch/repositories/s3/S3ServiceTests.java b/modules/repository-s3/src/test/java/org/elasticsearch/repositories/s3/S3ServiceTests.java index afe1bb1a03c76..8bf0c983ea74a 100644 --- a/modules/repository-s3/src/test/java/org/elasticsearch/repositories/s3/S3ServiceTests.java +++ b/modules/repository-s3/src/test/java/org/elasticsearch/repositories/s3/S3ServiceTests.java @@ -9,6 +9,7 @@ package org.elasticsearch.repositories.s3; import com.amazonaws.AmazonWebServiceRequest; +import com.amazonaws.retry.PredefinedRetryPolicies; import com.amazonaws.services.s3.model.AmazonS3Exception; import org.elasticsearch.cluster.metadata.RepositoryMetadata; @@ -19,6 +20,7 @@ import java.io.IOException; +import static org.hamcrest.CoreMatchers.equalTo; import static org.mockito.Mockito.mock; public class S3ServiceTests extends ESTestCase { @@ -47,19 +49,33 @@ public void testRetryOn403RetryPolicy() { e.setStatusCode(403); e.setErrorCode("InvalidAccessKeyId"); - // Retry on 403 invalid access key id + // AWS default retry condition does not retry on 403 + assertFalse(PredefinedRetryPolicies.DEFAULT_RETRY_CONDITION.shouldRetry(mock(AmazonWebServiceRequest.class), e, between(0, 9))); + + // The retryable 403 condition retries on 403 invalid access key id assertTrue( S3Service.RETRYABLE_403_RETRY_POLICY.getRetryCondition().shouldRetry(mock(AmazonWebServiceRequest.class), e, between(0, 9)) ); - // Not retry if not 403 or not invalid access key id if (randomBoolean()) { + // Random for another error status that is not 403 e.setStatusCode(randomValueOtherThan(403, () -> between(0, 600))); + // Retryable 403 condition delegates to the AWS default retry condition. Its result must be consistent with the decision + // by the AWS default, e.g. some error status like 429 is retryable by default, the retryable 403 condition respects it. + boolean actual = S3Service.RETRYABLE_403_RETRY_POLICY.getRetryCondition() + .shouldRetry(mock(AmazonWebServiceRequest.class), e, between(0, 9)); + boolean expected = PredefinedRetryPolicies.DEFAULT_RETRY_CONDITION.shouldRetry( + mock(AmazonWebServiceRequest.class), + e, + between(0, 9) + ); + assertThat(actual, equalTo(expected)); } else { + // Not retry for 403 with error code that is not invalid access key id e.setErrorCode(randomAlphaOfLength(10)); + assertFalse( + S3Service.RETRYABLE_403_RETRY_POLICY.getRetryCondition().shouldRetry(mock(AmazonWebServiceRequest.class), e, between(0, 9)) + ); } - assertFalse( - S3Service.RETRYABLE_403_RETRY_POLICY.getRetryCondition().shouldRetry(mock(AmazonWebServiceRequest.class), e, between(0, 9)) - ); } } diff --git a/muted-tests.yml b/muted-tests.yml index c996c8d25db56..5194b05eac258 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -160,9 +160,6 @@ tests: - class: org.elasticsearch.xpack.test.rest.XPackRestIT method: test {p0=ml/inference_crud/Test delete given model referenced by pipeline} issue: https://github.com/elastic/elasticsearch/issues/115970 -- class: org.elasticsearch.repositories.s3.S3ServiceTests - method: testRetryOn403RetryPolicy - issue: https://github.com/elastic/elasticsearch/issues/115986 - class: org.elasticsearch.search.slice.SearchSliceIT method: testPointInTime issue: https://github.com/elastic/elasticsearch/issues/115988 From 30feced7624d433ea6d97e9280a8dc2b4853075a Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Thu, 7 Nov 2024 18:49:20 +1100 Subject: [PATCH 10/11] Include both index and shard ID in allocation messages (#115895) The shardId in AllocationCommand is the 0-based ID for the shard. It needs to be combined with the index name to make sense in both error and explanation messages. This PR ensures that is the case for both CancelAllocationCommand and MoveAllocationCommand. Other commands are already doing the same thing. --- .../command/CancelAllocationCommand.java | 20 +++++++++----- .../command/MoveAllocationCommand.java | 26 ++++++++++++++----- 2 files changed, 33 insertions(+), 13 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/command/CancelAllocationCommand.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/command/CancelAllocationCommand.java index 79daceaf11851..f946402d1fa09 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/command/CancelAllocationCommand.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/command/CancelAllocationCommand.java @@ -136,11 +136,13 @@ public RerouteExplanation execute(RoutingAllocation allocation, boolean explain) allocation.decision( Decision.NO, "cancel_allocation_command", - "can't cancel " + shardId + ", failed to find it on node " + discoNode + "can't cancel [" + index + "][" + shardId + "], failed to find it on node " + discoNode ) ); } - throw new IllegalArgumentException("[cancel_allocation] can't cancel " + shardId + ", failed to find it on node " + discoNode); + throw new IllegalArgumentException( + "[cancel_allocation] can't cancel [" + index + "][" + shardId + "], failed to find it on node " + discoNode + ); } if (shardRouting.primary() && allowPrimary == false) { if ((shardRouting.initializing() && shardRouting.relocatingNodeId() != null) == false) { @@ -151,9 +153,11 @@ public RerouteExplanation execute(RoutingAllocation allocation, boolean explain) allocation.decision( Decision.NO, "cancel_allocation_command", - "can't cancel " + "can't cancel [" + + index + + "][" + shardId - + " on node " + + "] on node " + discoNode + ", shard is primary and " + shardRouting.state().name().toLowerCase(Locale.ROOT) @@ -161,9 +165,11 @@ public RerouteExplanation execute(RoutingAllocation allocation, boolean explain) ); } throw new IllegalArgumentException( - "[cancel_allocation] can't cancel " + "[cancel_allocation] can't cancel [" + + index + + "][" + shardId - + " on node " + + "] on node " + discoNode + ", shard is primary and " + shardRouting.state().name().toLowerCase(Locale.ROOT) @@ -178,7 +184,7 @@ public RerouteExplanation execute(RoutingAllocation allocation, boolean explain) allocation.decision( Decision.YES, "cancel_allocation_command", - "shard " + shardId + " on node " + discoNode + " can be cancelled" + "shard [" + index + "][" + shardId + "] on node " + discoNode + " can be cancelled" ) ); } diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/command/MoveAllocationCommand.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/command/MoveAllocationCommand.java index 4295a6178168a..b937ebdc33091 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/command/MoveAllocationCommand.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/command/MoveAllocationCommand.java @@ -141,11 +141,21 @@ public RerouteExplanation execute(RoutingAllocation allocation, boolean explain) if (explain) { return new RerouteExplanation( this, - allocation.decision(Decision.NO, "move_allocation_command", "shard " + shardId + " has not been started") + allocation.decision( + Decision.NO, + "move_allocation_command", + "shard [" + index + "][" + shardId + "] has not been started" + ) ); } throw new IllegalArgumentException( - "[move_allocation] can't move " + shardId + ", shard is not started (state = " + shardRouting.state() + "]" + "[move_allocation] can't move [" + + index + + "][" + + shardId + + "], shard is not started (state = " + + shardRouting.state() + + "]" ); } @@ -155,9 +165,11 @@ public RerouteExplanation execute(RoutingAllocation allocation, boolean explain) return new RerouteExplanation(this, decision); } throw new IllegalArgumentException( - "[move_allocation] can't move " + "[move_allocation] can't move [" + + index + + "][" + shardId - + ", from " + + "], from " + fromDiscoNode + ", to " + toDiscoNode @@ -182,10 +194,12 @@ public RerouteExplanation execute(RoutingAllocation allocation, boolean explain) if (explain) { return new RerouteExplanation( this, - allocation.decision(Decision.NO, "move_allocation_command", "shard " + shardId + " not found") + allocation.decision(Decision.NO, "move_allocation_command", "shard [" + index + "][" + shardId + "] not found") ); } - throw new IllegalArgumentException("[move_allocation] can't move " + shardId + ", failed to find it on node " + fromDiscoNode); + throw new IllegalArgumentException( + "[move_allocation] can't move [" + index + "][" + shardId + "], failed to find it on node " + fromDiscoNode + ); } return new RerouteExplanation(this, decision); } From 81fd1de76b79426993f7cacd3f86f6a2f21dbcf1 Mon Sep 17 00:00:00 2001 From: Tim Grein Date: Thu, 7 Nov 2024 08:51:26 +0100 Subject: [PATCH 11/11] Add ES|QL bit_length function (#115792) --- docs/changelog/115792.yaml | 5 + .../functions/description/bit_length.asciidoc | 5 + .../functions/examples/bit_length.asciidoc | 13 ++ .../esql/functions/kibana/docs/bit_length.md | 12 ++ .../esql/functions/layout/bit_length.asciidoc | 15 ++ .../functions/parameters/bit_length.asciidoc | 6 + .../esql/functions/signature/bit_length.svg | 1 + .../esql/functions/string-functions.asciidoc | 2 + .../esql/functions/types/bit_length.asciidoc | 10 ++ x-pack/plugin/build.gradle | 4 +- .../src/main/resources/docs.csv-spec | 19 +++ .../src/main/resources/string.csv-spec | 50 +++++++ .../scalar/string/BitLengthEvaluator.java | 137 ++++++++++++++++++ .../xpack/esql/action/EsqlCapabilities.java | 6 + .../function/EsqlFunctionRegistry.java | 2 + .../function/scalar/EsqlScalarFunction.java | 2 + .../function/scalar/string/BitLength.java | 100 +++++++++++++ .../string/BitLengthSerializationTests.java | 19 +++ .../scalar/string/BitLengthTests.java | 61 ++++++++ .../rest-api-spec/test/esql/60_usage.yml | 8 +- 20 files changed, 472 insertions(+), 5 deletions(-) create mode 100644 docs/changelog/115792.yaml create mode 100644 docs/reference/esql/functions/description/bit_length.asciidoc create mode 100644 docs/reference/esql/functions/examples/bit_length.asciidoc create mode 100644 docs/reference/esql/functions/kibana/docs/bit_length.md create mode 100644 docs/reference/esql/functions/layout/bit_length.asciidoc create mode 100644 docs/reference/esql/functions/parameters/bit_length.asciidoc create mode 100644 docs/reference/esql/functions/signature/bit_length.svg create mode 100644 docs/reference/esql/functions/types/bit_length.asciidoc create mode 100644 x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/string/BitLengthEvaluator.java create mode 100644 x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/BitLength.java create mode 100644 x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/BitLengthSerializationTests.java create mode 100644 x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/BitLengthTests.java diff --git a/docs/changelog/115792.yaml b/docs/changelog/115792.yaml new file mode 100644 index 0000000000000..2945a64e3043a --- /dev/null +++ b/docs/changelog/115792.yaml @@ -0,0 +1,5 @@ +pr: 115792 +summary: Add ES|QL `bit_length` function +area: ES|QL +type: enhancement +issues: [] diff --git a/docs/reference/esql/functions/description/bit_length.asciidoc b/docs/reference/esql/functions/description/bit_length.asciidoc new file mode 100644 index 0000000000000..1aad47488802d --- /dev/null +++ b/docs/reference/esql/functions/description/bit_length.asciidoc @@ -0,0 +1,5 @@ +// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it. + +*Description* + +Returns the bit length of a string. diff --git a/docs/reference/esql/functions/examples/bit_length.asciidoc b/docs/reference/esql/functions/examples/bit_length.asciidoc new file mode 100644 index 0000000000000..a99f6f664e79e --- /dev/null +++ b/docs/reference/esql/functions/examples/bit_length.asciidoc @@ -0,0 +1,13 @@ +// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it. + +*Example* + +[source.merge.styled,esql] +---- +include::{esql-specs}/docs.csv-spec[tag=bitLength] +---- +[%header.monospaced.styled,format=dsv,separator=|] +|=== +include::{esql-specs}/docs.csv-spec[tag=bitLength-result] +|=== + diff --git a/docs/reference/esql/functions/kibana/docs/bit_length.md b/docs/reference/esql/functions/kibana/docs/bit_length.md new file mode 100644 index 0000000000000..22280febd7876 --- /dev/null +++ b/docs/reference/esql/functions/kibana/docs/bit_length.md @@ -0,0 +1,12 @@ + + +### BIT_LENGTH +Returns the bit length of a string. + +``` +FROM employees +| KEEP first_name, last_name +| EVAL fn_bit_length = BIT_LENGTH(first_name) +``` diff --git a/docs/reference/esql/functions/layout/bit_length.asciidoc b/docs/reference/esql/functions/layout/bit_length.asciidoc new file mode 100644 index 0000000000000..00a7206f3ceda --- /dev/null +++ b/docs/reference/esql/functions/layout/bit_length.asciidoc @@ -0,0 +1,15 @@ +// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it. + +[discrete] +[[esql-bit_length]] +=== `BIT_LENGTH` + +*Syntax* + +[.text-center] +image::esql/functions/signature/bit_length.svg[Embedded,opts=inline] + +include::../parameters/bit_length.asciidoc[] +include::../description/bit_length.asciidoc[] +include::../types/bit_length.asciidoc[] +include::../examples/bit_length.asciidoc[] diff --git a/docs/reference/esql/functions/parameters/bit_length.asciidoc b/docs/reference/esql/functions/parameters/bit_length.asciidoc new file mode 100644 index 0000000000000..7bb8c080ce4a1 --- /dev/null +++ b/docs/reference/esql/functions/parameters/bit_length.asciidoc @@ -0,0 +1,6 @@ +// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it. + +*Parameters* + +`string`:: +String expression. If `null`, the function returns `null`. diff --git a/docs/reference/esql/functions/signature/bit_length.svg b/docs/reference/esql/functions/signature/bit_length.svg new file mode 100644 index 0000000000000..904dbbe25c9c2 --- /dev/null +++ b/docs/reference/esql/functions/signature/bit_length.svg @@ -0,0 +1 @@ +BIT_LENGTH(string) \ No newline at end of file diff --git a/docs/reference/esql/functions/string-functions.asciidoc b/docs/reference/esql/functions/string-functions.asciidoc index f5222330d579d..422860f0a7a1d 100644 --- a/docs/reference/esql/functions/string-functions.asciidoc +++ b/docs/reference/esql/functions/string-functions.asciidoc @@ -8,6 +8,7 @@ {esql} supports these string functions: // tag::string_list[] +* <> * <> * <> * <> @@ -30,6 +31,7 @@ * <> // end::string_list[] +include::layout/bit_length.asciidoc[] include::layout/concat.asciidoc[] include::layout/ends_with.asciidoc[] include::layout/from_base64.asciidoc[] diff --git a/docs/reference/esql/functions/types/bit_length.asciidoc b/docs/reference/esql/functions/types/bit_length.asciidoc new file mode 100644 index 0000000000000..db5a48c7c4390 --- /dev/null +++ b/docs/reference/esql/functions/types/bit_length.asciidoc @@ -0,0 +1,10 @@ +// This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it. + +*Supported types* + +[%header.monospaced.styled,format=dsv,separator=|] +|=== +string | result +keyword | integer +text | integer +|=== diff --git a/x-pack/plugin/build.gradle b/x-pack/plugin/build.gradle index cf6a8f51d1b81..8b920ac11cee7 100644 --- a/x-pack/plugin/build.gradle +++ b/x-pack/plugin/build.gradle @@ -82,8 +82,10 @@ tasks.named("precommit").configure { tasks.named("yamlRestCompatTestTransform").configure({ task -> task.skipTest("security/10_forbidden/Test bulk response with invalid credentials", "warning does not exist for compatibility") - task.skipTest("inference/inference_crud/Test get all", "Assertions on number of inference models break due to default configs") task.skipTest("esql/60_usage/Basic ESQL usage output (telemetry)", "The telemetry output changed. We dropped a column. That's safe.") + task.skipTest("inference/inference_crud/Test get all", "Assertions on number of inference models break due to default configs") + task.skipTest("esql/60_usage/Basic ESQL usage output (telemetry) snapshot version", "The number of functions is constantly increasing") + task.skipTest("esql/60_usage/Basic ESQL usage output (telemetry) non-snapshot version", "The number of functions is constantly increasing") task.skipTest("esql/80_text/reverse text", "The output type changed from TEXT to KEYWORD.") task.skipTest("esql/80_text/values function", "The output type changed from TEXT to KEYWORD.") }) diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/docs.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/docs.csv-spec index a9c5a5214f159..14d811535aafd 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/docs.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/docs.csv-spec @@ -656,3 +656,22 @@ FROM sample_data @timestamp:date | client_ip:ip | event_duration:long | message:keyword ; + +docsBitLength +required_capability: fn_bit_length +// tag::bitLength[] +FROM employees +| KEEP first_name, last_name +| EVAL fn_bit_length = BIT_LENGTH(first_name) +// end::bitLength[] +| SORT first_name +| LIMIT 3 +; + +// tag::bitLength-result[] +first_name:keyword | last_name:keyword | fn_bit_length:integer +Alejandro |McAlpine |72 +Amabile |Gomatam |56 +Anneke |Preusig |48 +// end::bitLength-result[] +; diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/string.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/string.csv-spec index 305b8f3d8011e..de5981df999c7 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/string.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/string.csv-spec @@ -38,6 +38,56 @@ emp_no:integer | l:integer 10003 | 5 ; +bitLength +required_capability: fn_bit_length +row a = "hello", b = "" | eval y = bit_length(a) + bit_length(b); + +a:keyword | b:keyword | y:integer +hello | | 40 +; + +bitLengthWithNonAsciiChars +required_capability: fn_bit_length +row a = "¡", b = "❗️" | eval y = bit_length(a) | eval z = bit_length(b); + +a:keyword | b:keyword | y:integer | z:integer +¡ | ❗️ | 16 | 48 +; + +foldBitLength +required_capability: fn_bit_length +row a = 1 | eval b = bit_length("hello"); + +a:integer | b:integer +1 | 40 +; + +bitLengthAndSourceQuoting +required_capability: fn_bit_length +from "employees" | sort emp_no | limit 3 | eval l = bit_length(first_name) | keep emp_no, l; + +emp_no:integer | l:integer +10001 | 48 +10002 | 56 +10003 | 40 +; + +bitLengthInsideOtherFunction +required_capability: fn_bit_length +row a = "abc", b = "de" | eval g = greatest(bit_length(a), bit_length(b), bit_length("fghi")); + +a:keyword | b:keyword | g:integer +abc | de | 32 +; + +bitLengthNull +required_capability: fn_bit_length +row a = "abc" | eval l = bit_length(null); + +a:string | l:integer +abc | null +; + startsWithConstant from employees | sort emp_no | limit 10 | eval f_S = starts_with(first_name, "S") | keep emp_no, first_name, f_S; diff --git a/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/string/BitLengthEvaluator.java b/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/string/BitLengthEvaluator.java new file mode 100644 index 0000000000000..6564a2f3ef167 --- /dev/null +++ b/x-pack/plugin/esql/src/main/generated/org/elasticsearch/xpack/esql/expression/function/scalar/string/BitLengthEvaluator.java @@ -0,0 +1,137 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License +// 2.0; you may not use this file except in compliance with the Elastic License +// 2.0. +package org.elasticsearch.xpack.esql.expression.function.scalar.string; + +import java.lang.ArithmeticException; +import java.lang.IllegalArgumentException; +import java.lang.Override; +import java.lang.String; +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.compute.data.Block; +import org.elasticsearch.compute.data.BytesRefBlock; +import org.elasticsearch.compute.data.BytesRefVector; +import org.elasticsearch.compute.data.IntBlock; +import org.elasticsearch.compute.data.Page; +import org.elasticsearch.compute.operator.DriverContext; +import org.elasticsearch.compute.operator.EvalOperator; +import org.elasticsearch.compute.operator.Warnings; +import org.elasticsearch.core.Releasables; +import org.elasticsearch.xpack.esql.core.tree.Source; + +/** + * {@link EvalOperator.ExpressionEvaluator} implementation for {@link BitLength}. + * This class is generated. Do not edit it. + */ +public final class BitLengthEvaluator implements EvalOperator.ExpressionEvaluator { + private final Source source; + + private final EvalOperator.ExpressionEvaluator val; + + private final DriverContext driverContext; + + private Warnings warnings; + + public BitLengthEvaluator(Source source, EvalOperator.ExpressionEvaluator val, + DriverContext driverContext) { + this.source = source; + this.val = val; + this.driverContext = driverContext; + } + + @Override + public Block eval(Page page) { + try (BytesRefBlock valBlock = (BytesRefBlock) val.eval(page)) { + BytesRefVector valVector = valBlock.asVector(); + if (valVector == null) { + return eval(page.getPositionCount(), valBlock); + } + return eval(page.getPositionCount(), valVector); + } + } + + public IntBlock eval(int positionCount, BytesRefBlock valBlock) { + try(IntBlock.Builder result = driverContext.blockFactory().newIntBlockBuilder(positionCount)) { + BytesRef valScratch = new BytesRef(); + position: for (int p = 0; p < positionCount; p++) { + if (valBlock.isNull(p)) { + result.appendNull(); + continue position; + } + if (valBlock.getValueCount(p) != 1) { + if (valBlock.getValueCount(p) > 1) { + warnings().registerException(new IllegalArgumentException("single-value function encountered multi-value")); + } + result.appendNull(); + continue position; + } + try { + result.appendInt(BitLength.process(valBlock.getBytesRef(valBlock.getFirstValueIndex(p), valScratch))); + } catch (ArithmeticException e) { + warnings().registerException(e); + result.appendNull(); + } + } + return result.build(); + } + } + + public IntBlock eval(int positionCount, BytesRefVector valVector) { + try(IntBlock.Builder result = driverContext.blockFactory().newIntBlockBuilder(positionCount)) { + BytesRef valScratch = new BytesRef(); + position: for (int p = 0; p < positionCount; p++) { + try { + result.appendInt(BitLength.process(valVector.getBytesRef(p, valScratch))); + } catch (ArithmeticException e) { + warnings().registerException(e); + result.appendNull(); + } + } + return result.build(); + } + } + + @Override + public String toString() { + return "BitLengthEvaluator[" + "val=" + val + "]"; + } + + @Override + public void close() { + Releasables.closeExpectNoException(val); + } + + private Warnings warnings() { + if (warnings == null) { + this.warnings = Warnings.createWarnings( + driverContext.warningsMode(), + source.source().getLineNumber(), + source.source().getColumnNumber(), + source.text() + ); + } + return warnings; + } + + static class Factory implements EvalOperator.ExpressionEvaluator.Factory { + private final Source source; + + private final EvalOperator.ExpressionEvaluator.Factory val; + + public Factory(Source source, EvalOperator.ExpressionEvaluator.Factory val) { + this.source = source; + this.val = val; + } + + @Override + public BitLengthEvaluator get(DriverContext context) { + return new BitLengthEvaluator(source, val.get(context), context); + } + + @Override + public String toString() { + return "BitLengthEvaluator[" + "val=" + val + "]"; + } + } +} diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java index 837acfc5083f7..38a8452095d68 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java @@ -27,6 +27,12 @@ */ public class EsqlCapabilities { public enum Cap { + + /** + * Support for function {@code BIT_LENGTH}. Done in #115792 + */ + FN_BIT_LENGTH, + /** * Support for function {@code REVERSE}. */ diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java index 66151275fc2e8..d7a23a6589d4f 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java @@ -117,6 +117,7 @@ import org.elasticsearch.xpack.esql.expression.function.scalar.spatial.StDistance; import org.elasticsearch.xpack.esql.expression.function.scalar.spatial.StX; import org.elasticsearch.xpack.esql.expression.function.scalar.spatial.StY; +import org.elasticsearch.xpack.esql.expression.function.scalar.string.BitLength; import org.elasticsearch.xpack.esql.expression.function.scalar.string.Concat; import org.elasticsearch.xpack.esql.expression.function.scalar.string.EndsWith; import org.elasticsearch.xpack.esql.expression.function.scalar.string.LTrim; @@ -305,6 +306,7 @@ private FunctionDefinition[][] functions() { def(Tau.class, Tau::new, "tau") }, // string new FunctionDefinition[] { + def(BitLength.class, BitLength::new, "bit_length"), def(Concat.class, Concat::new, "concat"), def(EndsWith.class, EndsWith::new, "ends_with"), def(LTrim.class, LTrim::new, "ltrim"), diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/EsqlScalarFunction.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/EsqlScalarFunction.java index e4e1fbb6e5aac..65985f234ac92 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/EsqlScalarFunction.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/EsqlScalarFunction.java @@ -38,6 +38,7 @@ import org.elasticsearch.xpack.esql.expression.function.scalar.math.Tau; import org.elasticsearch.xpack.esql.expression.function.scalar.nulls.Coalesce; import org.elasticsearch.xpack.esql.expression.function.scalar.spatial.BinarySpatialFunction; +import org.elasticsearch.xpack.esql.expression.function.scalar.string.BitLength; import org.elasticsearch.xpack.esql.expression.function.scalar.string.Concat; import org.elasticsearch.xpack.esql.expression.function.scalar.string.EndsWith; import org.elasticsearch.xpack.esql.expression.function.scalar.string.Left; @@ -74,6 +75,7 @@ public static List getNamedWriteables() { List entries = new ArrayList<>(); entries.add(And.ENTRY); entries.add(Atan2.ENTRY); + entries.add(BitLength.ENTRY); entries.add(Bucket.ENTRY); entries.add(Case.ENTRY); entries.add(Categorize.ENTRY); diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/BitLength.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/BitLength.java new file mode 100644 index 0000000000000..5deb6fa7feba6 --- /dev/null +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/BitLength.java @@ -0,0 +1,100 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.expression.function.scalar.string; + +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.common.io.stream.NamedWriteableRegistry; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.compute.ann.Evaluator; +import org.elasticsearch.compute.operator.EvalOperator; +import org.elasticsearch.xpack.esql.core.expression.Expression; +import org.elasticsearch.xpack.esql.core.tree.NodeInfo; +import org.elasticsearch.xpack.esql.core.tree.Source; +import org.elasticsearch.xpack.esql.core.type.DataType; +import org.elasticsearch.xpack.esql.expression.function.Example; +import org.elasticsearch.xpack.esql.expression.function.FunctionInfo; +import org.elasticsearch.xpack.esql.expression.function.Param; +import org.elasticsearch.xpack.esql.expression.function.scalar.UnaryScalarFunction; +import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput; + +import java.io.IOException; +import java.util.List; + +import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.DEFAULT; +import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isString; + +public class BitLength extends UnaryScalarFunction { + + public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry( + Expression.class, + "BitLength", + BitLength::new + ); + + @FunctionInfo( + returnType = "integer", + description = "Returns the bit length of a string.", + examples = @Example(file = "docs", tag = "bitLength") + ) + public BitLength( + Source source, + @Param( + name = "string", + type = { "keyword", "text" }, + description = "String expression. If `null`, the function returns `null`." + ) Expression field + ) { + super(source, field); + } + + private BitLength(StreamInput in) throws IOException { + this(Source.readFrom((PlanStreamInput) in), in.readNamedWriteable(Expression.class)); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + source().writeTo(out); + out.writeNamedWriteable(field()); + } + + @Override + public String getWriteableName() { + return ENTRY.name; + } + + @Override + public DataType dataType() { + return DataType.INTEGER; + } + + @Override + protected TypeResolution resolveType() { + return childrenResolved() == false ? new TypeResolution("Unresolved children") : isString(field(), sourceText(), DEFAULT); + } + + @Evaluator(warnExceptions = { ArithmeticException.class }) + static int process(BytesRef val) { + return Math.multiplyExact(val.length, Byte.SIZE); + } + + @Override + public Expression replaceChildren(List newChildren) { + return new BitLength(source(), newChildren.get(0)); + } + + @Override + protected NodeInfo info() { + return NodeInfo.create(this, BitLength::new, field()); + } + + @Override + public EvalOperator.ExpressionEvaluator.Factory toEvaluator(ToEvaluator toEvaluator) { + return new BitLengthEvaluator.Factory(source(), toEvaluator.apply(field())); + } +} diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/BitLengthSerializationTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/BitLengthSerializationTests.java new file mode 100644 index 0000000000000..2564ac0bdb1cf --- /dev/null +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/BitLengthSerializationTests.java @@ -0,0 +1,19 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.expression.function.scalar.string; + +import org.elasticsearch.xpack.esql.core.expression.Expression; +import org.elasticsearch.xpack.esql.core.tree.Source; +import org.elasticsearch.xpack.esql.expression.AbstractUnaryScalarSerializationTests; + +public class BitLengthSerializationTests extends AbstractUnaryScalarSerializationTests { + @Override + protected BitLength create(Source source, Expression child) { + return new BitLength(source, child); + } +} diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/BitLengthTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/BitLengthTests.java new file mode 100644 index 0000000000000..bce4328a08abf --- /dev/null +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/scalar/string/BitLengthTests.java @@ -0,0 +1,61 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.expression.function.scalar.string; + +import com.carrotsearch.randomizedtesting.annotations.Name; +import com.carrotsearch.randomizedtesting.annotations.ParametersFactory; + +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.common.lucene.BytesRefs; +import org.elasticsearch.xpack.esql.core.expression.Expression; +import org.elasticsearch.xpack.esql.core.tree.Source; +import org.elasticsearch.xpack.esql.core.type.DataType; +import org.elasticsearch.xpack.esql.expression.function.AbstractScalarFunctionTestCase; +import org.elasticsearch.xpack.esql.expression.function.TestCaseSupplier; + +import java.util.ArrayList; +import java.util.List; +import java.util.function.Supplier; + +import static org.hamcrest.Matchers.equalTo; + +public class BitLengthTests extends AbstractScalarFunctionTestCase { + + public BitLengthTests(@Name("TestCase") Supplier testCaseSupplier) { + this.testCase = testCaseSupplier.get(); + } + + @ParametersFactory + public static Iterable parameters() { + List suppliers = new ArrayList<>(); + + for (DataType stringType : DataType.stringTypes()) { + for (var supplier : TestCaseSupplier.stringCases(stringType)) { + suppliers.add(makeSupplier(supplier)); + } + } + + return parameterSuppliersFromTypedDataWithDefaultChecks(true, suppliers, (v, p) -> "string"); + } + + @Override + protected Expression build(Source source, List args) { + return new BitLength(source, args.get(0)); + } + + private static TestCaseSupplier makeSupplier(TestCaseSupplier.TypedDataSupplier fieldSupplier) { + return new TestCaseSupplier(fieldSupplier.name(), List.of(fieldSupplier.type()), () -> { + var fieldTypedData = fieldSupplier.get(); + String evaluatorToString = "BitLengthEvaluator[val=Attribute[channel=0]]"; + BytesRef value = BytesRefs.toBytesRef(fieldTypedData.data()); + var expectedValue = value.length * Byte.SIZE; + + return new TestCaseSupplier.TestCase(List.of(fieldTypedData), evaluatorToString, DataType.INTEGER, equalTo(expectedValue)); + }); + } +} diff --git a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/60_usage.yml b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/60_usage.yml index b51bbdc4d2f87..df88de8da4e01 100644 --- a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/60_usage.yml +++ b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/60_usage.yml @@ -30,7 +30,7 @@ setup: - method: POST path: /_query parameters: [] - capabilities: [ snapshot_test_for_telemetry ] + capabilities: [ snapshot_test_for_telemetry, fn_bit_length ] reason: "Test that should only be executed on snapshot versions" - do: {xpack.usage: {}} @@ -91,7 +91,7 @@ setup: - match: {esql.functions.cos: $functions_cos} - gt: {esql.functions.to_long: $functions_to_long} - match: {esql.functions.coalesce: $functions_coalesce} - - length: {esql.functions: 117} # check the "sister" test below for a likely update to the same esql.functions length check + - length: {esql.functions: 118} # check the "sister" test below for a likely update to the same esql.functions length check --- "Basic ESQL usage output (telemetry) non-snapshot version": @@ -101,7 +101,7 @@ setup: - method: POST path: /_query parameters: [] - capabilities: [ non_snapshot_test_for_telemetry ] + capabilities: [ non_snapshot_test_for_telemetry, fn_bit_length ] reason: "Test that should only be executed on release versions" - do: {xpack.usage: {}} @@ -162,4 +162,4 @@ setup: - match: {esql.functions.cos: $functions_cos} - gt: {esql.functions.to_long: $functions_to_long} - match: {esql.functions.coalesce: $functions_coalesce} - - length: {esql.functions: 115} # check the "sister" test above for a likely update to the same esql.functions length check + - length: {esql.functions: 118} # check the "sister" test above for a likely update to the same esql.functions length check