elastic · alex-spies · Jul 17, 2024 · Jul 4, 2024 · Jul 4, 2024 · Jul 4, 2024
diff --git a/docs/changelog/110488.yaml b/docs/changelog/110488.yaml
@@ -0,0 +1,6 @@
+pr: 110488
+summary: "ESQL: Validate unique plan attribute names"
+area: ES|QL
+type: bug
+issues:
+ - 110541
diff --git a/docs/reference/esql/processing-commands/dissect.asciidoc b/docs/reference/esql/processing-commands/dissect.asciidoc
@@ -20,6 +20,8 @@ multiple values, `DISSECT` will process each value.
 
 `pattern`::
 A <<esql-dissect-patterns,dissect pattern>>.
+In case a field name coincides with an existing column, the existing column is discarded.
-In case a field name coincides with an existing column, the existing column is discarded.
+If a field name conflicts with an existing column, the existing column is discarded.
-In case a field name coincides with an existing column, the existing column is discarded.
+If a field name conflicts with an existing column, the existing column is discarded.
+If a field name is used more than once, only the rightmost duplicate creates a column.
 
 `<separator>`::
 A string used as the separator between appended values, when using the <<esql-append-modifier,append modifier>>.

diff --git a/docs/reference/esql/processing-commands/enrich.asciidoc b/docs/reference/esql/processing-commands/enrich.asciidoc
@@ -31,11 +31,16 @@ name as the `match_field` defined in the <<esql-enrich-policy,enrich policy>>.
 The enrich fields from the enrich index that are added to the result as new
 columns. If a column with the same name as the enrich field already exists, the
 existing column will be replaced by the new column. If not specified, each of
-the enrich fields defined in the policy is added
+the enrich fields defined in the policy is added.
+If a column has the same name as the enrich field, it will be discarded unless
+that field is given a new name.
-If a column has the same name as the enrich field, it will be discarded unless
-that field is given a new name.
+A column with the same name as the enrich field will be discarded unless 
+the enrich field is renamed.
-If a column has the same name as the enrich field, it will be discarded unless
-that field is given a new name.
+A column with the same name as the enrich field will be discarded unless 
+the enrich field is renamed.
 
 `new_nameX`::
 Enables you to change the name of the column that's added for each of the enrich
 fields. Defaults to the enrich field name.
+If a column has the same name as the new name, it will be discarded.
+If a name (new or original) occurs more than once, only the rightmost duplicate
+creates a new column.
 
 *Description*
 

diff --git a/docs/reference/esql/processing-commands/eval.asciidoc b/docs/reference/esql/processing-commands/eval.asciidoc
@@ -16,10 +16,12 @@ EVAL [column1 =] value1[, ..., [columnN =] valueN]
 
 `columnX`::
 The column name.
+In case a column name coincides with an existing column, the existing column is discarded.
-In case a column name coincides with an existing column, the existing column is discarded.
+If a column exists with the same name, the existing column is discarded.
-In case a column name coincides with an existing column, the existing column is discarded.
+If a column exists with the same name, the existing column is discarded.
+If a column name is used more than once, only the rightmost duplicate creates a column.
 
 `valueX`::
 The value for the column. Can be a literal, an expression, or a
-<<esql-functions,function>>.
+<<esql-functions,function>>. Can use columns defined left of this one.
 
 *Description*
 

diff --git a/docs/reference/esql/processing-commands/grok.asciidoc b/docs/reference/esql/processing-commands/grok.asciidoc
@@ -20,6 +20,9 @@ multiple values, `GROK` will process each value.
 
 `pattern`::
 A grok pattern.
+In case a field name coincides with an existing column, the existing column is discarded.
-In case a field name coincides with an existing column, the existing column is discarded.
+If a field name conflicts with an existing column, the existing column is discarded.
-In case a field name coincides with an existing column, the existing column is discarded.
+If a field name conflicts with an existing column, the existing column is discarded.
+If a field name is used more than once, a multi-valued column will be created with one value
+per each occurrence of the field name.
 
 *Description*
 
@@ -67,4 +70,16 @@ include::{esql-specs}/docs.csv-spec[tag=grokWithToDatetime]
 |===
 include::{esql-specs}/docs.csv-spec[tag=grokWithToDatetime-result]
 |===
+
+In case a field name is used more than once, `GROK` creates a multi-valued
-In case a field name is used more than once, `GROK` creates a multi-valued
+If a field name is used more than once, `GROK` creates a multi-valued
-In case a field name is used more than once, `GROK` creates a multi-valued
+If a field name is used more than once, `GROK` creates a multi-valued
+column:
+
+[source.merge.styled,esql]
+----
+include::{esql-specs}/docs.csv-spec[tag=grokWithDuplicateFieldNames]
+----
+[%header.monospaced.styled,format=dsv,separator=|]
+|===
+include::{esql-specs}/docs.csv-spec[tag=grokWithDuplicateFieldNames-result]
+|===
 // end::examples[]
diff --git a/docs/reference/esql/processing-commands/keep.asciidoc b/docs/reference/esql/processing-commands/keep.asciidoc
@@ -16,6 +16,10 @@ KEEP columns
 
 `columns`::
 A comma-separated list of columns to keep. Supports wildcards.
+In case a column name without wildcards occurs multiple times, the column is placed at the
+position of the rightmost duplicate. The same is true if a column name matches multiple
+wildcards. If a column name matches both a wildcard and also a column name without wildcards,
+the position of the latter is used.
-In case a column name without wildcards occurs multiple times, the column is placed at the
-position of the rightmost duplicate. The same is true if a column name matches multiple
-wildcards. If a column name matches both a wildcard and also a column name without wildcards,
-the position of the latter is used.
+If a column name without wildcards occurs multiple times, the column is placed at the
+position of the rightmost duplicate. The same is true if a column name matches multiple
+wildcards. If a column name matches both a wildcard and also a column name without wildcards,
+the position of the latter is used.
-In case a column name without wildcards occurs multiple times, the column is placed at the
-position of the rightmost duplicate. The same is true if a column name matches multiple
-wildcards. If a column name matches both a wildcard and also a column name without wildcards,
-the position of the latter is used.
+If a column name without wildcards occurs multiple times, the column is placed at the
+position of the rightmost duplicate. The same is true if a column name matches multiple
+wildcards. If a column name matches both a wildcard and also a column name without wildcards,
+the position of the latter is used.
 
 *Description*
 
@@ -29,7 +33,7 @@ Fields are added in the order they appear. If one field matches multiple express
 2. Partial wildcard expressions (for example: `fieldNam*`)
 3. Wildcard only (`*`)
 
-If a field matches two expressions with the same precedence, the right-most expression wins.
+If a field matches two expressions with the same precedence, the rightmost expression wins.
 
 Refer to the examples for illustrations of these precedence rules.
 

diff --git a/docs/reference/esql/processing-commands/lookup.asciidoc b/docs/reference/esql/processing-commands/lookup.asciidoc
@@ -18,6 +18,8 @@ LOOKUP table ON match_field1[, match_field2, ...]
 
 `table`::
 The name of the `table` provided in the request to match.
+If it has column names that conflict with existing columns, the table's columns replace the
-If it has column names that conflict with existing columns, the table's columns replace the
+If the table's column names conflict with existing columns, the table's columns replace the
-If it has column names that conflict with existing columns, the table's columns replace the
+If the table's column names conflict with existing columns, the table's columns replace the
+existing ones.
 
 `match_field`::
 The fields in the input to match against the table.

diff --git a/docs/reference/esql/processing-commands/rename.asciidoc b/docs/reference/esql/processing-commands/rename.asciidoc
@@ -17,7 +17,10 @@ RENAME old_name1 AS new_name1[, ..., old_nameN AS new_nameN]
 The name of a column you want to rename.
 
 `new_nameX`::
-The new name of the column.
+The new name of the column. If it conflicts with an existing column name,
+the existing column is replaced by the renamed column. If multiple columns
+are renamed to the same name, all but the rightmost column with the same new
+name are dropped.
 
 *Description*
 

diff --git a/docs/reference/esql/processing-commands/stats.asciidoc b/docs/reference/esql/processing-commands/stats.asciidoc
@@ -18,12 +18,15 @@ STATS [column1 =] expression1[, ..., [columnN =] expressionN]
 `columnX`::
 The name by which the aggregated value is returned. If omitted, the name is
 equal to the corresponding expression (`expressionX`).
+If multiple columns have the same name, all but the rightmost column with this
+name will be ignored.
 
 `expressionX`::
 An expression that computes an aggregated value.
 
 `grouping_expressionX`::
 An expression that outputs the values to group by.
+If its name coincides with one of the computed columns, that column will be ignored.
 
 NOTE: Individual `null` values are skipped when computing aggregations.
 

diff --git a/docs/reference/esql/source-commands/row.asciidoc b/docs/reference/esql/source-commands/row.asciidoc
@@ -16,6 +16,7 @@ ROW column1 = value1[, ..., columnN = valueN]
 
 `columnX`::
 The column name.
+In case of duplicate column names, only the rightmost duplicate creates a column.
 
 `valueX`::
 The value for the column. Can be a literal, an expression, or a

diff --git a/...n/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java b/...n/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java
@@ -96,8 +96,8 @@ public class CsvTestsDataLoader {
         "cartesian_multipolygons.csv"
     );
     private static final TestsDataset DISTANCES = new TestsDataset("distances", "mapping-distances.json", "distances.csv");
-
     private static final TestsDataset K8S = new TestsDataset("k8s", "k8s-mappings.json", "k8s.csv", "k8s-settings.json", true);
+    private static final TestsDataset ADDRESSES = new TestsDataset("addresses", "mapping-addresses.json", "addresses.csv", null, true);
 
     public static final Map<String, TestsDataset> CSV_DATASET_MAP = Map.ofEntries(
         Map.entry(EMPLOYEES.indexName, EMPLOYEES),
@@ -121,7 +121,8 @@ public class CsvTestsDataLoader {
         Map.entry(AIRPORT_CITY_BOUNDARIES.indexName, AIRPORT_CITY_BOUNDARIES),
         Map.entry(CARTESIAN_MULTIPOLYGONS.indexName, CARTESIAN_MULTIPOLYGONS),
         Map.entry(K8S.indexName, K8S),
-        Map.entry(DISTANCES.indexName, DISTANCES)
+        Map.entry(DISTANCES.indexName, DISTANCES),
+        Map.entry(ADDRESSES.indexName, ADDRESSES)
     );
 
     private static final EnrichConfig LANGUAGES_ENRICH = new EnrichConfig("languages_policy", "enrich-policy-languages.json");

diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/addresses.csv b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/addresses.csv
@@ -0,0 +1,4 @@
+street:keyword,number:keyword,zip_code:keyword,city.name:keyword,city.country.name:keyword,city.country.continent.name:keyword,city.country.continent.planet.name:keyword,city.country.continent.planet.galaxy:keyword
+Keizersgracht,281,1016 ED,Amsterdam,Netherlands,Europe,Earth,Milky Way
+Kearny St,88,CA 94108,San Francisco,United States of America,North America,Earth,Milky Way
+Marunouchi,2-7-2,100-7014,Tokyo,Japan,Asia,Earth,Milky Way
diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/dissect.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/dissect.csv-spec
@@ -26,6 +26,19 @@ first_name:keyword | left:keyword | full_name:keyword | right:keyword | last_nam
 Georgi             | left         |    Georgi Facello | right         | Facello
 ;
 
+shadowingSubfields
+FROM addresses
+| KEEP city.country.continent.planet.name, city.country.name, city.name
+| DISSECT city.name "%{city.country.continent.planet.name} %{?}"
+| SORT city.name
+;
+
+city.country.name:keyword | city.name:keyword | city.country.continent.planet.name:keyword
+Netherlands               | Amsterdam         | null
+United States of America  | San Francisco     | San
+Japan                     | Tokyo             | null
+;
+
 shadowingSelf
 FROM employees
 | KEEP first_name, last_name
@@ -50,6 +63,18 @@ last_name:keyword | left:keyword | foo:keyword             | middle:keyword | ri
 Facello           | left         | Georgi1 Georgi2 Facello | middle         | right         | Georgi1      | Georgi2            | Facello
 ;
 
+shadowingInternal
+FROM employees
+| KEEP first_name, last_name
+| WHERE last_name == "Facello"
+| EVAL name = concat(first_name, "1 ", last_name)
+| DISSECT name "%{foo} %{foo}"
+;
+
+first_name:keyword | last_name:keyword | name:keyword    | foo:keyword
+Georgi             | Facello           | Georgi1 Facello | Facello
+;
+
 
 complexPattern
 ROW a = "1953-01-23T12:15:00Z - some text - 127.0.0.1;" 

diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/docs.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/docs.csv-spec
@@ -436,6 +436,23 @@ ROW a = "1.2.3.4 [2023-01-23T12:15:00.000Z] Connected"
 // end::grokWithEscape-result[]
 ;
 
+grokWithDuplicateFieldNames
+// tag::grokWithDuplicateFieldNames[]
+FROM addresses
+| KEEP city.name, zip_code
+| GROK zip_code "%{WORD:zip_parts} %{WORD:zip_parts}"
+// end::grokWithDuplicateFieldNames[]
+| SORT city.name
+;
+
+// tag::grokWithDuplicateFieldNames-result[]
+city.name:keyword | zip_code:keyword | zip_parts:keyword
+Amsterdam         | 1016 ED          | ["1016", "ED"]
+San Francisco     | CA 94108         | ["CA", "94108"]
+Tokyo             | 100-7014         | null
+// end::grokWithDuplicateFieldNames-result[]
+;
+
 basicDissect
 // tag::basicDissect[]
 ROW a = "2023-01-23T12:15:00.000Z - some text - 127.0.0.1" 

diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/drop.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/drop.csv-spec
@@ -122,3 +122,51 @@ FROM employees | STATS COUNT(*), MIN(salary  * 10), MAX(languages)| DROP `COUNT(
 MIN(salary  * 10):i | MAX(languages):i
 253240              | 5
 ;
+
+// Not really shadowing, but let's keep the name consistent with the other command's tests
+shadowingInternal
+FROM employees
+| KEEP emp_no, first_name, last_name
+| DROP last_name, last_name
+| LIMIT 2
+;
+
+emp_no:integer | first_name:keyword
+         10001 | Georgi
+         10002 | Bezalel
+;
+
+shadowingInternalWildcard
+FROM employees
+| KEEP emp_no, first_name, last_name
+| DROP last*name, last*name, last*, last_name
+| LIMIT 2
+;
+
+emp_no:integer | first_name:keyword
+         10001 | Georgi
+         10002 | Bezalel
+;
+
+subfields
+FROM addresses
+| DROP city.country.continent.planet.name, city.country.continent.name, city.country.name, number, street, zip_code, city.country.continent.planet.name
+| SORT city.name
+;
+
+city.country.continent.planet.galaxy:keyword | city.name:keyword
+Milky Way                                    | Amsterdam
+Milky Way                                    | San Francisco
+Milky Way                                    | Tokyo
+;
+
+subfieldsWildcard
+FROM addresses
+| DROP *.name, number, street, zip_code, *ame
+;
+
+city.country.continent.planet.galaxy:keyword
+Milky Way
+Milky Way
+Milky Way
+;
diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/enrich.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/enrich.csv-spec
@@ -69,6 +69,34 @@ ROW left = "left", foo = "foo", client_ip = "172.21.0.5", env = "env", right = "
 left:keyword | client_ip:keyword | env:keyword | right:keyword | foo:keyword
 ;
 
+shadowingSubfields
+required_capability: enrich_load
+FROM addresses
+| KEEP city.country.continent.planet.name, city.country.name, city.name
+| EVAL city.name = REPLACE(city.name, "San Francisco", "South San Francisco")
+| ENRICH city_names ON city.name WITH city.country.continent.planet.name = airport
+| SORT city.name
+;
+
+city.country.name:keyword | city.name:keyword   | city.country.continent.planet.name:text
+Netherlands               | Amsterdam           | null
+United States of America  | South San Francisco | San Francisco Int'l
+Japan                     | Tokyo               | null
+;
+
+shadowingSubfieldsLimit0
+required_capability: enrich_load
+FROM addresses
+| KEEP city.country.continent.planet.name, city.country.name, city.name
+| EVAL city.name = REPLACE(city.name, "San Francisco", "South San Francisco")
+| ENRICH city_names ON city.name WITH city.country.continent.planet.name = airport
+| SORT city.name
+| LIMIT 0
+;
+
+city.country.name:keyword | city.name:keyword | city.country.continent.planet.name:text
+;
+
 shadowingSelf
 required_capability: enrich_load
 ROW left = "left", client_ip = "172.21.0.5", env = "env", right = "right"
@@ -107,6 +135,46 @@ ROW left = "left", airport = "Zurich Airport ZRH", city = "Zürich", middle = "m
 left:keyword | city:keyword | middle:keyword | right:keyword | airport:text | region:text | city_boundary:geo_shape
 ;
 
+shadowingInternal
+required_capability: enrich_load
+ROW city = "Zürich"
+| ENRICH city_names ON city WITH x = airport, x = region
+;
+
+city:keyword | x:text
+Zürich       | Bezirk Zürich
+;
+
+shadowingInternalImplicit
+required_capability: enrich_load
+ROW city = "Zürich"
+| ENRICH city_names ON city WITH airport = region
+;
+
+city:keyword | airport:text
+Zürich       | Bezirk Zürich
+;
+
+shadowingInternalImplicit2
+required_capability: enrich_load
+ROW city = "Zürich"
+| ENRICH city_names ON city WITH airport, airport = region
+;
+
+city:keyword | airport:text
+Zürich       | Bezirk Zürich
+;
+
+shadowingInternalImplicit3
+required_capability: enrich_load
+ROW city = "Zürich"
+| ENRICH city_names ON city WITH airport = region, airport
+;
+
+city:keyword | airport:text
+Zürich       | Zurich Int'l
+;
+
 simple
 required_capability: enrich_load
 

diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/eval.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/eval.csv-spec
@@ -15,6 +15,19 @@ left:keyword | right:keyword | x:integer
 left         | right         | 1
 ;
 
+shadowingSubfields
+FROM addresses
+| KEEP city.country.continent.planet.name, city.country.name, city.name
+| EVAL city.country.continent.planet.name = to_upper(city.country.continent.planet.name)
+| SORT city.name
+;
+
+city.country.name:keyword | city.name:keyword | city.country.continent.planet.name:keyword 
+Netherlands               | Amsterdam         | EARTH
+United States of America  | San Francisco     | EARTH
+Japan                     | Tokyo             | EARTH
+;
+
 shadowingSelf
 ROW left = "left", x = 10000 , right = "right"
 | EVAL x = x + 1
@@ -33,6 +46,16 @@ left:keyword | middle:keyword | right:keyword | x:integer | y:integer
 left         | middle         | right         | 9         | 10
 ;
 
+shadowingInternal
+ROW x = 10000
+| EVAL x = x + 1, x = x - 2
+;
+
+x:integer
+9999
+;
+
+
 withMath
 row a = 1 | eval b = 2 + 3;