From b0fce466a0c10bf89d6d5c24363f139cd7c88025 Mon Sep 17 00:00:00 2001 From: Alexander Spies Date: Mon, 16 Dec 2024 17:03:23 +0100 Subject: [PATCH] ESQL: tests for LOOKUP JOIN with non-unique join keys (#118471) (#118661) Add a csv dataset and tests for `LOOKUP JOIN` where the join keys are not unique. In particular, add tests that include MVs and nulls to see how `LOOKUP JOIN` treats these. --- .../xpack/esql/CsvTestsDataLoader.java | 3 + .../resources/languages_non_unique_key.csv | 10 +++ .../src/main/resources/lookup-join.csv-spec | 78 ++++++++++++++++++- 3 files changed, 90 insertions(+), 1 deletion(-) create mode 100644 x-pack/plugin/esql/qa/testFixtures/src/main/resources/languages_non_unique_key.csv diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java index 1e8013e81b301..c66d6839fa7d2 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/CsvTestsDataLoader.java @@ -62,6 +62,8 @@ public class CsvTestsDataLoader { private static final TestsDataset LANGUAGES = new TestsDataset("languages"); private static final TestsDataset LANGUAGES_LOOKUP = LANGUAGES.withIndex("languages_lookup") .withSetting("languages_lookup-settings.json"); + private static final TestsDataset LANGUAGES_LOOKUP_NON_UNIQUE_KEY = LANGUAGES_LOOKUP.withIndex("languages_lookup_non_unique_key") + .withData("languages_non_unique_key.csv"); private static final TestsDataset ALERTS = new TestsDataset("alerts"); private static final TestsDataset UL_LOGS = new TestsDataset("ul_logs"); private static final TestsDataset SAMPLE_DATA = new TestsDataset("sample_data"); @@ -113,6 +115,7 @@ public class CsvTestsDataLoader { Map.entry(APPS_SHORT.indexName, APPS_SHORT), Map.entry(LANGUAGES.indexName, LANGUAGES), Map.entry(LANGUAGES_LOOKUP.indexName, LANGUAGES_LOOKUP), + Map.entry(LANGUAGES_LOOKUP_NON_UNIQUE_KEY.indexName, LANGUAGES_LOOKUP_NON_UNIQUE_KEY), Map.entry(UL_LOGS.indexName, UL_LOGS), Map.entry(SAMPLE_DATA.indexName, SAMPLE_DATA), Map.entry(MV_SAMPLE_DATA.indexName, MV_SAMPLE_DATA), diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/languages_non_unique_key.csv b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/languages_non_unique_key.csv new file mode 100644 index 0000000000000..1578762f8d1cb --- /dev/null +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/languages_non_unique_key.csv @@ -0,0 +1,10 @@ +language_code:integer,language_name:keyword,country:keyword +1,English,Canada +1,English, +1,,United Kingdom +1,English,United States of America +2,German,[Germany,Austria] +2,German,Switzerland +2,German, +4,Quenya, +5,,Atlantis diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec index 74b7a19d06bd6..c39f4ae7b4e0c 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/lookup-join.csv-spec @@ -3,7 +3,6 @@ // Reuses the sample dataset and commands from enrich.csv-spec // -//TODO: this sometimes returns null instead of the looked up value (likely related to the execution order) basicOnTheDataNode required_capability: join_lookup_v5 @@ -102,6 +101,83 @@ emp_no:integer | language_code:integer | language_name:keyword 10003 | 4 | German ; +nonUniqueLeftKeyOnTheDataNode +required_capability: join_lookup_v5 + +FROM employees +| WHERE emp_no <= 10030 +| EVAL language_code = emp_no % 10 +| WHERE language_code < 3 +| LOOKUP JOIN languages_lookup ON language_code +| SORT emp_no +| KEEP emp_no, language_code, language_name +; + +emp_no:integer | language_code:integer | language_name:keyword +10001 |1 | English +10002 |2 | French +10010 |0 | null +10011 |1 | English +10012 |2 | French +10020 |0 | null +10021 |1 | English +10022 |2 | French +10030 |0 | null +; + +nonUniqueRightKeyOnTheDataNode +required_capability: join_lookup_v5 + +FROM employees +| EVAL language_code = emp_no % 10 +| LOOKUP JOIN languages_lookup_non_unique_key ON language_code +| WHERE emp_no > 10090 AND emp_no < 10096 +| SORT emp_no +| EVAL country = MV_SORT(country) +| KEEP emp_no, language_code, language_name, country +; + +emp_no:integer | language_code:integer | language_name:keyword | country:keyword +10091 | 1 | [English, English, English] | [Canada, United Kingdom, United States of America] +10092 | 2 | [German, German, German] | [Austria, Germany, Switzerland] +10093 | 3 | null | null +10094 | 4 | Quenya | null +10095 | 5 | null | Atlantis +; + +nonUniqueRightKeyOnTheCoordinator +required_capability: join_lookup_v5 + +FROM employees +| SORT emp_no +| LIMIT 5 +| EVAL language_code = emp_no % 10 +| LOOKUP JOIN languages_lookup_non_unique_key ON language_code +| EVAL country = MV_SORT(country) +| KEEP emp_no, language_code, language_name, country +; + +emp_no:integer | language_code:integer | language_name:keyword | country:keyword +10001 | 1 | [English, English, English] | [Canada, United Kingdom, United States of America] +10002 | 2 | [German, German, German] | [Austria, Germany, Switzerland] +10003 | 3 | null | null +10004 | 4 | Quenya | null +10005 | 5 | null | Atlantis +; + +nonUniqueRightKeyFromRow +required_capability: join_lookup_v5 + +ROW language_code = 2 +| LOOKUP JOIN languages_lookup_non_unique_key ON language_code +| DROP country.keyword +| EVAL country = MV_SORT(country) +; + +language_code:integer | language_name:keyword | country:keyword +2 | [German, German, German] | [Austria, Germany, Switzerland] +; + lookupIPFromRow required_capability: join_lookup_v5