From deadc1be87d823e90c5d8efb6ecc7d1f8daa75f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Fri, 2 Feb 2024 11:14:45 +0100 Subject: [PATCH] feat(api): support the inner join convenience to not repeat fields known to be equal (#8127) Co-authored-by: Phillip Cloud <417981+cpcloud@users.noreply.github.com> --- ibis/backends/polars/tests/test_join.py | 1 - .../test_many_subqueries/bigquery/out.sql | 68 +++--- .../test_many_subqueries/clickhouse/out.sql | 67 +++--- .../test_many_subqueries/datafusion/out.sql | 67 +++--- .../test_many_subqueries/druid/out.sql | 42 ++++ .../test_many_subqueries/duckdb/out.sql | 67 +++--- .../test_many_subqueries/exasol/out.sql | 42 ++++ .../test_many_subqueries/impala/out.sql | 70 +++--- .../test_many_subqueries/mssql/out.sql | 66 +++--- .../test_many_subqueries/mysql/out.sql | 66 +++--- .../test_many_subqueries/oracle/out.sql | 66 +++--- .../test_many_subqueries/postgres/out.sql | 66 +++--- .../test_many_subqueries/pyspark/out.sql | 42 ++++ .../test_many_subqueries/snowflake/out.sql | 67 +++--- .../test_many_subqueries/trino/out.sql | 66 +++--- .../bigquery/out.sql | 32 ++- .../clickhouse/out.sql | 46 ++-- .../datafusion/out.sql | 56 ++--- .../test_cte_refs_in_topo_order/druid/out.sql | 20 ++ .../duckdb/out.sql | 48 ++-- .../exasol/out.sql | 20 ++ .../impala/out.sql | 32 ++- .../test_cte_refs_in_topo_order/mssql/out.sql | 36 +-- .../test_cte_refs_in_topo_order/mysql/out.sql | 34 ++- .../oracle/out.sql | 34 ++- .../postgres/out.sql | 32 ++- .../pyspark/out.sql | 20 ++ .../snowflake/out.sql | 48 ++-- .../test_cte_refs_in_topo_order/trino/out.sql | 32 ++- ibis/backends/tests/test_generic.py | 16 +- ibis/backends/tests/test_sql.py | 5 +- ibis/common/egraph.py | 13 ++ ibis/common/tests/test_egraph.py | 7 + .../test_table_count_expr/join_repr.txt | 1 - ibis/expr/tests/test_newrels.py | 138 +++++++++++- ibis/expr/types/joins.py | 209 +++++++++++++----- ibis/tests/expr/test_table.py | 78 +++---- 37 files changed, 1058 insertions(+), 762 deletions(-) create mode 100644 ibis/backends/tests/snapshots/test_generic/test_many_subqueries/druid/out.sql create mode 100644 ibis/backends/tests/snapshots/test_generic/test_many_subqueries/exasol/out.sql create mode 100644 ibis/backends/tests/snapshots/test_generic/test_many_subqueries/pyspark/out.sql create mode 100644 ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/druid/out.sql create mode 100644 ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/exasol/out.sql create mode 100644 ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/pyspark/out.sql diff --git a/ibis/backends/polars/tests/test_join.py b/ibis/backends/polars/tests/test_join.py index 51a7295dd924..26667c808981 100644 --- a/ibis/backends/polars/tests/test_join.py +++ b/ibis/backends/polars/tests/test_join.py @@ -17,7 +17,6 @@ def test_memtable_join(con): "x": [1, 2, 3], "y": [4, 5, 6], "z": ["a", "b", "c"], - "x_right": [1, 2, 3], "y_right": [9, 8, 7], "z_right": ["f", "e", "d"], } diff --git a/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/bigquery/out.sql b/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/bigquery/out.sql index 07af57981776..85f1c3cc6b78 100644 --- a/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/bigquery/out.sql +++ b/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/bigquery/out.sql @@ -1,36 +1,42 @@ -WITH t0 AS ( +WITH t6 AS ( SELECT - t5.*, - ( - row_number() OVER (ORDER BY t5.`street` ASC) - 1 - ) AS `key` - FROM data AS t5 + t5.street, + ROW_NUMBER() OVER (ORDER BY t5.street ASC) - 1 AS key + FROM ( + SELECT + t2.street, + t2.key + FROM ( + SELECT + t0.street, + ROW_NUMBER() OVER (ORDER BY t0.street ASC) - 1 AS key + FROM data AS t0 + ) AS t2 + INNER JOIN ( + SELECT + t1.key + FROM ( + SELECT + t0.street, + ROW_NUMBER() OVER (ORDER BY t0.street ASC) - 1 AS key + FROM data AS t0 + ) AS t1 + ) AS t4 + ON t2.key = t4.key + ) AS t5 ), t1 AS ( SELECT - t0.`key` - FROM t0 -), t2 AS ( - SELECT - t0.`street`, - t0.`key` - FROM t0 - INNER JOIN t1 - ON t0.`key` = t1.`key` -), t3 AS ( - SELECT - t2.`street`, - ( - row_number() OVER (ORDER BY t2.`street` ASC) - 1 - ) AS `key` - FROM t2 -), t4 AS ( - SELECT - t3.`key` - FROM t3 + t0.street, + ROW_NUMBER() OVER (ORDER BY t0.street ASC) - 1 AS key + FROM data AS t0 ) SELECT - t3.`street`, - t3.`key` -FROM t3 -INNER JOIN t4 - ON t3.`key` = t4.`key` \ No newline at end of file + t8.street, + t8.key +FROM t6 AS t8 +INNER JOIN ( + SELECT + t7.key + FROM t6 AS t7 +) AS t10 + ON t8.key = t10.key \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/clickhouse/out.sql b/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/clickhouse/out.sql index e472a5727fab..fca431bc4c45 100644 --- a/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/clickhouse/out.sql +++ b/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/clickhouse/out.sql @@ -1,55 +1,42 @@ -SELECT - t5.street AS street, - t5.key AS key, - t5.key_right AS key_right -FROM ( - SELECT - t1.street AS street, - ROW_NUMBER() OVER (ORDER BY t1.street ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS key, - t3.key AS key_right - FROM ( - SELECT - t0.street AS street, - ROW_NUMBER() OVER (ORDER BY t0.street ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS key - FROM data AS t0 - ) AS t1 - INNER JOIN ( - SELECT - t1.key AS key - FROM ( - SELECT - t0.street AS street, - ROW_NUMBER() OVER (ORDER BY t0.street ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS key - FROM data AS t0 - ) AS t1 - ) AS t3 - ON t1.key = t3.key -) AS t5 -INNER JOIN ( +WITH t6 AS ( SELECT - t5.key AS key + t5.street, + ROW_NUMBER() OVER (ORDER BY t5.street ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS key FROM ( SELECT - t1.street AS street, - ROW_NUMBER() OVER (ORDER BY t1.street ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS key, - t3.key AS key_right + t2.street, + t2.key FROM ( SELECT - t0.street AS street, + t0.street, ROW_NUMBER() OVER (ORDER BY t0.street ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS key FROM data AS t0 - ) AS t1 + ) AS t2 INNER JOIN ( SELECT - t1.key AS key + t1.key FROM ( SELECT - t0.street AS street, + t0.street, ROW_NUMBER() OVER (ORDER BY t0.street ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS key FROM data AS t0 ) AS t1 - ) AS t3 - ON t1.key = t3.key + ) AS t4 + ON t2.key = t4.key ) AS t5 -) AS t7 - ON t5.key = t7.key \ No newline at end of file +), t1 AS ( + SELECT + t0.street, + ROW_NUMBER() OVER (ORDER BY t0.street ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS key + FROM data AS t0 +) +SELECT + t8.street, + t8.key +FROM t6 AS t8 +INNER JOIN ( + SELECT + t7.key + FROM t6 AS t7 +) AS t10 + ON t8.key = t10.key \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/datafusion/out.sql b/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/datafusion/out.sql index f7cf54e9de51..64a6e78e8bf3 100644 --- a/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/datafusion/out.sql +++ b/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/datafusion/out.sql @@ -1,55 +1,42 @@ -SELECT - "t5"."street" AS "street", - "t5"."key" AS "key", - "t5"."key_right" AS "key_right" -FROM ( - SELECT - "t1"."street" AS "street", - ROW_NUMBER() OVER (ORDER BY "t1"."street" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS "key", - "t2"."key" AS "key_right" - FROM ( - SELECT - "t0"."street" AS "street", - ROW_NUMBER() OVER (ORDER BY "t0"."street" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS "key" - FROM "data" AS "t0" - ) AS "t1" - INNER JOIN ( - SELECT - "t1"."key" AS "key" - FROM ( - SELECT - "t0"."street" AS "street", - ROW_NUMBER() OVER (ORDER BY "t0"."street" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS "key" - FROM "data" AS "t0" - ) AS "t1" - ) AS "t2" - ON "t1"."key" = "t2"."key" -) AS "t5" -INNER JOIN ( +WITH "t6" AS ( SELECT - "t5"."key" AS "key" + "t5"."street", + ROW_NUMBER() OVER (ORDER BY "t5"."street" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS "key" FROM ( SELECT - "t1"."street" AS "street", - ROW_NUMBER() OVER (ORDER BY "t1"."street" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS "key", - "t2"."key" AS "key_right" + "t2"."street", + "t2"."key" FROM ( SELECT - "t0"."street" AS "street", + "t0"."street", ROW_NUMBER() OVER (ORDER BY "t0"."street" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS "key" FROM "data" AS "t0" - ) AS "t1" + ) AS "t2" INNER JOIN ( SELECT - "t1"."key" AS "key" + "t1"."key" FROM ( SELECT - "t0"."street" AS "street", + "t0"."street", ROW_NUMBER() OVER (ORDER BY "t0"."street" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS "key" FROM "data" AS "t0" ) AS "t1" - ) AS "t2" - ON "t1"."key" = "t2"."key" + ) AS "t4" + ON "t2"."key" = "t4"."key" ) AS "t5" -) AS "t6" - ON "t5"."key" = "t6"."key" \ No newline at end of file +), "t1" AS ( + SELECT + "t0"."street", + ROW_NUMBER() OVER (ORDER BY "t0"."street" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS "key" + FROM "data" AS "t0" +) +SELECT + "t8"."street", + "t8"."key" +FROM "t6" AS "t8" +INNER JOIN ( + SELECT + "t7"."key" + FROM "t6" AS "t7" +) AS "t10" + ON "t8"."key" = "t10"."key" \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/druid/out.sql b/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/druid/out.sql new file mode 100644 index 000000000000..64a6e78e8bf3 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/druid/out.sql @@ -0,0 +1,42 @@ +WITH "t6" AS ( + SELECT + "t5"."street", + ROW_NUMBER() OVER (ORDER BY "t5"."street" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS "key" + FROM ( + SELECT + "t2"."street", + "t2"."key" + FROM ( + SELECT + "t0"."street", + ROW_NUMBER() OVER (ORDER BY "t0"."street" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS "key" + FROM "data" AS "t0" + ) AS "t2" + INNER JOIN ( + SELECT + "t1"."key" + FROM ( + SELECT + "t0"."street", + ROW_NUMBER() OVER (ORDER BY "t0"."street" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS "key" + FROM "data" AS "t0" + ) AS "t1" + ) AS "t4" + ON "t2"."key" = "t4"."key" + ) AS "t5" +), "t1" AS ( + SELECT + "t0"."street", + ROW_NUMBER() OVER (ORDER BY "t0"."street" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS "key" + FROM "data" AS "t0" +) +SELECT + "t8"."street", + "t8"."key" +FROM "t6" AS "t8" +INNER JOIN ( + SELECT + "t7"."key" + FROM "t6" AS "t7" +) AS "t10" + ON "t8"."key" = "t10"."key" \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/duckdb/out.sql b/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/duckdb/out.sql index 2a699a186d7d..2e5f7d14030f 100644 --- a/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/duckdb/out.sql +++ b/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/duckdb/out.sql @@ -1,55 +1,42 @@ -SELECT - t5.street AS street, - t5.key AS key, - t5.key_right AS key_right -FROM ( - SELECT - t1.street AS street, - ROW_NUMBER() OVER (ORDER BY t1.street ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - CAST(1 AS TINYINT) AS key, - t3.key AS key_right - FROM ( - SELECT - t0.street AS street, - ROW_NUMBER() OVER (ORDER BY t0.street ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - CAST(1 AS TINYINT) AS key - FROM data AS t0 - ) AS t1 - INNER JOIN ( - SELECT - t1.key AS key - FROM ( - SELECT - t0.street AS street, - ROW_NUMBER() OVER (ORDER BY t0.street ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - CAST(1 AS TINYINT) AS key - FROM data AS t0 - ) AS t1 - ) AS t3 - ON t1.key = t3.key -) AS t5 -INNER JOIN ( +WITH t6 AS ( SELECT - t5.key AS key + t5.street, + ROW_NUMBER() OVER (ORDER BY t5.street ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - CAST(1 AS TINYINT) AS key FROM ( SELECT - t1.street AS street, - ROW_NUMBER() OVER (ORDER BY t1.street ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - CAST(1 AS TINYINT) AS key, - t3.key AS key_right + t2.street, + t2.key FROM ( SELECT - t0.street AS street, + t0.street, ROW_NUMBER() OVER (ORDER BY t0.street ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - CAST(1 AS TINYINT) AS key FROM data AS t0 - ) AS t1 + ) AS t2 INNER JOIN ( SELECT - t1.key AS key + t1.key FROM ( SELECT - t0.street AS street, + t0.street, ROW_NUMBER() OVER (ORDER BY t0.street ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - CAST(1 AS TINYINT) AS key FROM data AS t0 ) AS t1 - ) AS t3 - ON t1.key = t3.key + ) AS t4 + ON t2.key = t4.key ) AS t5 -) AS t7 - ON t5.key = t7.key \ No newline at end of file +), t1 AS ( + SELECT + t0.street, + ROW_NUMBER() OVER (ORDER BY t0.street ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - CAST(1 AS TINYINT) AS key + FROM data AS t0 +) +SELECT + t8.street, + t8.key +FROM t6 AS t8 +INNER JOIN ( + SELECT + t7.key + FROM t6 AS t7 +) AS t10 + ON t8.key = t10.key \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/exasol/out.sql b/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/exasol/out.sql new file mode 100644 index 000000000000..c2670a045cce --- /dev/null +++ b/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/exasol/out.sql @@ -0,0 +1,42 @@ +WITH "t6" AS ( + SELECT + "t5"."street", + ROW_NUMBER() OVER (ORDER BY "t5"."street" ASC) - 1 AS "key" + FROM ( + SELECT + "t2"."street", + "t2"."key" + FROM ( + SELECT + "t0"."street", + ROW_NUMBER() OVER (ORDER BY "t0"."street" ASC) - 1 AS "key" + FROM "data" AS "t0" + ) AS "t2" + INNER JOIN ( + SELECT + "t1"."key" + FROM ( + SELECT + "t0"."street", + ROW_NUMBER() OVER (ORDER BY "t0"."street" ASC) - 1 AS "key" + FROM "data" AS "t0" + ) AS "t1" + ) AS "t4" + ON "t2"."key" = "t4"."key" + ) AS "t5" +), "t1" AS ( + SELECT + "t0"."street", + ROW_NUMBER() OVER (ORDER BY "t0"."street" ASC) - 1 AS "key" + FROM "data" AS "t0" +) +SELECT + "t8"."street", + "t8"."key" +FROM "t6" AS "t8" +INNER JOIN ( + SELECT + "t7"."key" + FROM "t6" AS "t7" +) AS "t10" + ON "t8"."key" = "t10"."key" \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/impala/out.sql b/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/impala/out.sql index eaec992e3f55..208dc189381d 100644 --- a/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/impala/out.sql +++ b/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/impala/out.sql @@ -1,36 +1,42 @@ -WITH t0 AS ( +WITH `t6` AS ( SELECT - t5.*, - ( - ROW_NUMBER() OVER (ORDER BY t5.`street` ASC) - 1 - ) AS `key` - FROM `data` AS t5 -), t1 AS ( + `t5`.`street`, + ROW_NUMBER() OVER (ORDER BY `t5`.`street` ASC NULLS LAST) - 1 AS `key` + FROM ( + SELECT + `t2`.`street`, + `t2`.`key` + FROM ( + SELECT + `t0`.`street`, + ROW_NUMBER() OVER (ORDER BY `t0`.`street` ASC NULLS LAST) - 1 AS `key` + FROM `data` AS `t0` + ) AS `t2` + INNER JOIN ( + SELECT + `t1`.`key` + FROM ( + SELECT + `t0`.`street`, + ROW_NUMBER() OVER (ORDER BY `t0`.`street` ASC NULLS LAST) - 1 AS `key` + FROM `data` AS `t0` + ) AS `t1` + ) AS `t4` + ON `t2`.`key` = `t4`.`key` + ) AS `t5` +), `t1` AS ( SELECT - t0.`key` - FROM t0 -), t2 AS ( - SELECT - t0.`street`, - t0.`key` - FROM t0 - INNER JOIN t1 - ON t0.`key` = t1.`key` -), t3 AS ( - SELECT - t2.`street`, - ( - ROW_NUMBER() OVER (ORDER BY t2.`street` ASC) - 1 - ) AS `key` - FROM t2 -), t4 AS ( - SELECT - t3.`key` - FROM t3 + `t0`.`street`, + ROW_NUMBER() OVER (ORDER BY `t0`.`street` ASC NULLS LAST) - 1 AS `key` + FROM `data` AS `t0` ) SELECT - t3.`street`, - t3.`key` -FROM t3 -INNER JOIN t4 - ON t3.`key` = t4.`key` \ No newline at end of file + `t8`.`street`, + `t8`.`key` +FROM `t6` AS `t8` +INNER JOIN ( + SELECT + `t7`.`key` + FROM `t6` AS `t7` +) AS `t10` + ON `t8`.`key` = `t10`.`key` \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/mssql/out.sql b/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/mssql/out.sql index 3dc5e59da76a..b20ffa2875e7 100644 --- a/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/mssql/out.sql +++ b/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/mssql/out.sql @@ -1,32 +1,42 @@ -WITH t0 AS ( +WITH [t6] AS ( SELECT - t5.street AS street, - ROW_NUMBER() OVER (ORDER BY t5.street ASC) - 1 AS [key] - FROM data AS t5 -), t1 AS ( + [t5].[street] AS [street], + ROW_NUMBER() OVER (ORDER BY CASE WHEN [t5].[street] IS NULL THEN 1 ELSE 0 END, [t5].[street] ASC) - 1 AS [key] + FROM ( + SELECT + [t2].[street] AS [street], + [t2].[key] AS [key] + FROM ( + SELECT + [t0].[street] AS [street], + ROW_NUMBER() OVER (ORDER BY CASE WHEN [t0].[street] IS NULL THEN 1 ELSE 0 END, [t0].[street] ASC) - 1 AS [key] + FROM [data] AS [t0] + ) AS [t2] + INNER JOIN ( + SELECT + [t1].[key] AS [key] + FROM ( + SELECT + [t0].[street] AS [street], + ROW_NUMBER() OVER (ORDER BY CASE WHEN [t0].[street] IS NULL THEN 1 ELSE 0 END, [t0].[street] ASC) - 1 AS [key] + FROM [data] AS [t0] + ) AS [t1] + ) AS [t4] + ON [t2].[key] = [t4].[key] + ) AS [t5] +), [t1] AS ( SELECT - t0.[key] AS [key] - FROM t0 -), t2 AS ( - SELECT - t0.street AS street, - t0.[key] AS [key] - FROM t0 - JOIN t1 - ON t0.[key] = t1.[key] -), t3 AS ( - SELECT - t2.street AS street, - ROW_NUMBER() OVER (ORDER BY t2.street ASC) - 1 AS [key] - FROM t2 -), t4 AS ( - SELECT - t3.[key] AS [key] - FROM t3 + [t0].[street] AS [street], + ROW_NUMBER() OVER (ORDER BY CASE WHEN [t0].[street] IS NULL THEN 1 ELSE 0 END, [t0].[street] ASC) - 1 AS [key] + FROM [data] AS [t0] ) SELECT - t3.street, - t3.[key] -FROM t3 -JOIN t4 - ON t3.[key] = t4.[key] \ No newline at end of file + [t8].[street], + [t8].[key] +FROM [t6] AS [t8] +INNER JOIN ( + SELECT + [t7].[key] AS [key] + FROM [t6] AS [t7] +) AS [t10] + ON [t8].[key] = [t10].[key] \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/mysql/out.sql b/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/mysql/out.sql index e0ed4bcbb100..f5252710ab98 100644 --- a/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/mysql/out.sql +++ b/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/mysql/out.sql @@ -1,32 +1,42 @@ -WITH t0 AS ( +WITH `t6` AS ( SELECT - t5.street AS street, - ROW_NUMBER() OVER (ORDER BY t5.street ASC) - 1 AS `key` - FROM data AS t5 -), t1 AS ( + `t5`.`street`, + ROW_NUMBER() OVER (ORDER BY CASE WHEN `t5`.`street` IS NULL THEN 1 ELSE 0 END, `t5`.`street` ASC) - 1 AS `key` + FROM ( + SELECT + `t2`.`street`, + `t2`.`key` + FROM ( + SELECT + `t0`.`street`, + ROW_NUMBER() OVER (ORDER BY CASE WHEN `t0`.`street` IS NULL THEN 1 ELSE 0 END, `t0`.`street` ASC) - 1 AS `key` + FROM `data` AS `t0` + ) AS `t2` + INNER JOIN ( + SELECT + `t1`.`key` + FROM ( + SELECT + `t0`.`street`, + ROW_NUMBER() OVER (ORDER BY CASE WHEN `t0`.`street` IS NULL THEN 1 ELSE 0 END, `t0`.`street` ASC) - 1 AS `key` + FROM `data` AS `t0` + ) AS `t1` + ) AS `t4` + ON `t2`.`key` = `t4`.`key` + ) AS `t5` +), `t1` AS ( SELECT - t0.`key` AS `key` - FROM t0 -), t2 AS ( - SELECT - t0.street AS street, - t0.`key` AS `key` - FROM t0 - INNER JOIN t1 - ON t0.`key` = t1.`key` -), t3 AS ( - SELECT - t2.street AS street, - ROW_NUMBER() OVER (ORDER BY t2.street ASC) - 1 AS `key` - FROM t2 -), t4 AS ( - SELECT - t3.`key` AS `key` - FROM t3 + `t0`.`street`, + ROW_NUMBER() OVER (ORDER BY CASE WHEN `t0`.`street` IS NULL THEN 1 ELSE 0 END, `t0`.`street` ASC) - 1 AS `key` + FROM `data` AS `t0` ) SELECT - t3.street, - t3.`key` -FROM t3 -INNER JOIN t4 - ON t3.`key` = t4.`key` \ No newline at end of file + `t8`.`street`, + `t8`.`key` +FROM `t6` AS `t8` +INNER JOIN ( + SELECT + `t7`.`key` + FROM `t6` AS `t7` +) AS `t10` + ON `t8`.`key` = `t10`.`key` \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/oracle/out.sql b/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/oracle/out.sql index 9459ded5586f..bb10644c1dad 100644 --- a/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/oracle/out.sql +++ b/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/oracle/out.sql @@ -1,32 +1,42 @@ -WITH t0 AS ( +WITH "t6" AS ( SELECT - t5."street" AS "street", - ROW_NUMBER() OVER (ORDER BY t5."street" ASC) - 1 AS "key" - FROM "data" t5 -), t1 AS ( + "t5"."street", + ROW_NUMBER() OVER (ORDER BY "t5"."street" ASC NULLS LAST) - 1 AS "key" + FROM ( + SELECT + "t2"."street", + "t2"."key" + FROM ( + SELECT + "t0"."street", + ROW_NUMBER() OVER (ORDER BY "t0"."street" ASC NULLS LAST) - 1 AS "key" + FROM "data" "t0" + ) "t2" + INNER JOIN ( + SELECT + "t1"."key" + FROM ( + SELECT + "t0"."street", + ROW_NUMBER() OVER (ORDER BY "t0"."street" ASC NULLS LAST) - 1 AS "key" + FROM "data" "t0" + ) "t1" + ) "t4" + ON "t2"."key" = "t4"."key" + ) "t5" +), "t1" AS ( SELECT - t0."key" AS "key" - FROM t0 -), t2 AS ( - SELECT - t0."street" AS "street", - t0."key" AS "key" - FROM t0 - JOIN t1 - ON t0."key" = t1."key" -), t3 AS ( - SELECT - t2."street" AS "street", - ROW_NUMBER() OVER (ORDER BY t2."street" ASC) - 1 AS "key" - FROM t2 -), t4 AS ( - SELECT - t3."key" AS "key" - FROM t3 + "t0"."street", + ROW_NUMBER() OVER (ORDER BY "t0"."street" ASC NULLS LAST) - 1 AS "key" + FROM "data" "t0" ) SELECT - t3."street", - t3."key" -FROM t3 -JOIN t4 - ON t3."key" = t4."key" \ No newline at end of file + "t8"."street", + "t8"."key" +FROM "t6" "t8" +INNER JOIN ( + SELECT + "t7"."key" + FROM "t6" "t7" +) "t10" + ON "t8"."key" = "t10"."key" \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/postgres/out.sql b/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/postgres/out.sql index fb8c40cd69ba..64a6e78e8bf3 100644 --- a/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/postgres/out.sql +++ b/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/postgres/out.sql @@ -1,32 +1,42 @@ -WITH t0 AS ( +WITH "t6" AS ( SELECT - t5.street AS street, - ROW_NUMBER() OVER (ORDER BY t5.street ASC) - 1 AS key - FROM data AS t5 -), t1 AS ( + "t5"."street", + ROW_NUMBER() OVER (ORDER BY "t5"."street" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS "key" + FROM ( + SELECT + "t2"."street", + "t2"."key" + FROM ( + SELECT + "t0"."street", + ROW_NUMBER() OVER (ORDER BY "t0"."street" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS "key" + FROM "data" AS "t0" + ) AS "t2" + INNER JOIN ( + SELECT + "t1"."key" + FROM ( + SELECT + "t0"."street", + ROW_NUMBER() OVER (ORDER BY "t0"."street" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS "key" + FROM "data" AS "t0" + ) AS "t1" + ) AS "t4" + ON "t2"."key" = "t4"."key" + ) AS "t5" +), "t1" AS ( SELECT - t0.key AS key - FROM t0 -), t2 AS ( - SELECT - t0.street AS street, - t0.key AS key - FROM t0 - JOIN t1 - ON t0.key = t1.key -), t3 AS ( - SELECT - t2.street AS street, - ROW_NUMBER() OVER (ORDER BY t2.street ASC) - 1 AS key - FROM t2 -), t4 AS ( - SELECT - t3.key AS key - FROM t3 + "t0"."street", + ROW_NUMBER() OVER (ORDER BY "t0"."street" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS "key" + FROM "data" AS "t0" ) SELECT - t3.street, - t3.key -FROM t3 -JOIN t4 - ON t3.key = t4.key \ No newline at end of file + "t8"."street", + "t8"."key" +FROM "t6" AS "t8" +INNER JOIN ( + SELECT + "t7"."key" + FROM "t6" AS "t7" +) AS "t10" + ON "t8"."key" = "t10"."key" \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/pyspark/out.sql b/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/pyspark/out.sql new file mode 100644 index 000000000000..208dc189381d --- /dev/null +++ b/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/pyspark/out.sql @@ -0,0 +1,42 @@ +WITH `t6` AS ( + SELECT + `t5`.`street`, + ROW_NUMBER() OVER (ORDER BY `t5`.`street` ASC NULLS LAST) - 1 AS `key` + FROM ( + SELECT + `t2`.`street`, + `t2`.`key` + FROM ( + SELECT + `t0`.`street`, + ROW_NUMBER() OVER (ORDER BY `t0`.`street` ASC NULLS LAST) - 1 AS `key` + FROM `data` AS `t0` + ) AS `t2` + INNER JOIN ( + SELECT + `t1`.`key` + FROM ( + SELECT + `t0`.`street`, + ROW_NUMBER() OVER (ORDER BY `t0`.`street` ASC NULLS LAST) - 1 AS `key` + FROM `data` AS `t0` + ) AS `t1` + ) AS `t4` + ON `t2`.`key` = `t4`.`key` + ) AS `t5` +), `t1` AS ( + SELECT + `t0`.`street`, + ROW_NUMBER() OVER (ORDER BY `t0`.`street` ASC NULLS LAST) - 1 AS `key` + FROM `data` AS `t0` +) +SELECT + `t8`.`street`, + `t8`.`key` +FROM `t6` AS `t8` +INNER JOIN ( + SELECT + `t7`.`key` + FROM `t6` AS `t7` +) AS `t10` + ON `t8`.`key` = `t10`.`key` \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/snowflake/out.sql b/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/snowflake/out.sql index d63129cc6985..64a6e78e8bf3 100644 --- a/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/snowflake/out.sql +++ b/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/snowflake/out.sql @@ -1,55 +1,42 @@ -SELECT - "t5"."street" AS "street", - "t5"."key" AS "key", - "t5"."key_right" AS "key_right" -FROM ( - SELECT - "t1"."street" AS "street", - ROW_NUMBER() OVER (ORDER BY "t1"."street" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS "key", - "t3"."key" AS "key_right" - FROM ( - SELECT - "t0"."street" AS "street", - ROW_NUMBER() OVER (ORDER BY "t0"."street" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS "key" - FROM "data" AS "t0" - ) AS "t1" - INNER JOIN ( - SELECT - "t1"."key" AS "key" - FROM ( - SELECT - "t0"."street" AS "street", - ROW_NUMBER() OVER (ORDER BY "t0"."street" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS "key" - FROM "data" AS "t0" - ) AS "t1" - ) AS "t3" - ON "t1"."key" = "t3"."key" -) AS "t5" -INNER JOIN ( +WITH "t6" AS ( SELECT - "t5"."key" AS "key" + "t5"."street", + ROW_NUMBER() OVER (ORDER BY "t5"."street" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS "key" FROM ( SELECT - "t1"."street" AS "street", - ROW_NUMBER() OVER (ORDER BY "t1"."street" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS "key", - "t3"."key" AS "key_right" + "t2"."street", + "t2"."key" FROM ( SELECT - "t0"."street" AS "street", + "t0"."street", ROW_NUMBER() OVER (ORDER BY "t0"."street" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS "key" FROM "data" AS "t0" - ) AS "t1" + ) AS "t2" INNER JOIN ( SELECT - "t1"."key" AS "key" + "t1"."key" FROM ( SELECT - "t0"."street" AS "street", + "t0"."street", ROW_NUMBER() OVER (ORDER BY "t0"."street" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS "key" FROM "data" AS "t0" ) AS "t1" - ) AS "t3" - ON "t1"."key" = "t3"."key" + ) AS "t4" + ON "t2"."key" = "t4"."key" ) AS "t5" -) AS "t7" - ON "t5"."key" = "t7"."key" \ No newline at end of file +), "t1" AS ( + SELECT + "t0"."street", + ROW_NUMBER() OVER (ORDER BY "t0"."street" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS "key" + FROM "data" AS "t0" +) +SELECT + "t8"."street", + "t8"."key" +FROM "t6" AS "t8" +INNER JOIN ( + SELECT + "t7"."key" + FROM "t6" AS "t7" +) AS "t10" + ON "t8"."key" = "t10"."key" \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/trino/out.sql b/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/trino/out.sql index fb8c40cd69ba..64a6e78e8bf3 100644 --- a/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/trino/out.sql +++ b/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/trino/out.sql @@ -1,32 +1,42 @@ -WITH t0 AS ( +WITH "t6" AS ( SELECT - t5.street AS street, - ROW_NUMBER() OVER (ORDER BY t5.street ASC) - 1 AS key - FROM data AS t5 -), t1 AS ( + "t5"."street", + ROW_NUMBER() OVER (ORDER BY "t5"."street" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS "key" + FROM ( + SELECT + "t2"."street", + "t2"."key" + FROM ( + SELECT + "t0"."street", + ROW_NUMBER() OVER (ORDER BY "t0"."street" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS "key" + FROM "data" AS "t0" + ) AS "t2" + INNER JOIN ( + SELECT + "t1"."key" + FROM ( + SELECT + "t0"."street", + ROW_NUMBER() OVER (ORDER BY "t0"."street" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS "key" + FROM "data" AS "t0" + ) AS "t1" + ) AS "t4" + ON "t2"."key" = "t4"."key" + ) AS "t5" +), "t1" AS ( SELECT - t0.key AS key - FROM t0 -), t2 AS ( - SELECT - t0.street AS street, - t0.key AS key - FROM t0 - JOIN t1 - ON t0.key = t1.key -), t3 AS ( - SELECT - t2.street AS street, - ROW_NUMBER() OVER (ORDER BY t2.street ASC) - 1 AS key - FROM t2 -), t4 AS ( - SELECT - t3.key AS key - FROM t3 + "t0"."street", + ROW_NUMBER() OVER (ORDER BY "t0"."street" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - 1 AS "key" + FROM "data" AS "t0" ) SELECT - t3.street, - t3.key -FROM t3 -JOIN t4 - ON t3.key = t4.key \ No newline at end of file + "t8"."street", + "t8"."key" +FROM "t6" AS "t8" +INNER JOIN ( + SELECT + "t7"."key" + FROM "t6" AS "t7" +) AS "t10" + ON "t8"."key" = "t10"."key" \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/bigquery/out.sql b/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/bigquery/out.sql index 47f235ccccf8..d0b7a174d49a 100644 --- a/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/bigquery/out.sql +++ b/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/bigquery/out.sql @@ -1,22 +1,20 @@ -WITH t0 AS ( +WITH t1 AS ( SELECT - t4.* - FROM leaf AS t4 + t0.key + FROM leaf AS t0 WHERE TRUE -), t1 AS ( - SELECT - t0.`key` - FROM t0 -), t2 AS ( - SELECT - t0.`key` - FROM t0 - INNER JOIN t1 - ON t0.`key` = t1.`key` ) SELECT - t2.`key` -FROM t2 -INNER JOIN t2 AS t3 - ON t2.`key` = t3.`key` \ No newline at end of file + t3.key +FROM t1 AS t3 +INNER JOIN t1 AS t4 + ON t3.key = t4.key +INNER JOIN ( + SELECT + t3.key + FROM t1 AS t3 + INNER JOIN t1 AS t4 + ON t3.key = t4.key +) AS t6 + ON t3.key = t6.key \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/clickhouse/out.sql b/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/clickhouse/out.sql index 6dfef25abe9f..d0b7a174d49a 100644 --- a/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/clickhouse/out.sql +++ b/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/clickhouse/out.sql @@ -1,40 +1,20 @@ -SELECT - t2.key AS key, - t3.key AS key_right, - t6.key_right AS key_right_right -FROM ( +WITH t1 AS ( SELECT - t0.key AS key + t0.key FROM leaf AS t0 WHERE TRUE -) AS t2 -INNER JOIN ( - SELECT - t0.key AS key - FROM leaf AS t0 - WHERE - TRUE -) AS t3 - ON t2.key = t3.key +) +SELECT + t3.key +FROM t1 AS t3 +INNER JOIN t1 AS t4 + ON t3.key = t4.key INNER JOIN ( SELECT - t2.key AS key, - t3.key AS key_right - FROM ( - SELECT - t0.key AS key - FROM leaf AS t0 - WHERE - TRUE - ) AS t2 - INNER JOIN ( - SELECT - t0.key AS key - FROM leaf AS t0 - WHERE - TRUE - ) AS t3 - ON t2.key = t3.key + t3.key + FROM t1 AS t3 + INNER JOIN t1 AS t4 + ON t3.key = t4.key ) AS t6 - ON t6.key = t6.key \ No newline at end of file + ON t3.key = t6.key \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/datafusion/out.sql b/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/datafusion/out.sql index 96acd49caaad..3cccc7356173 100644 --- a/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/datafusion/out.sql +++ b/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/datafusion/out.sql @@ -1,48 +1,20 @@ -SELECT - "t1"."key" AS "key", - "t2"."key" AS "key_right", - "t4"."key_right" AS "key_right_right" -FROM ( +WITH "t1" AS ( SELECT - * + "t0"."key" FROM "leaf" AS "t0" WHERE TRUE -) AS "t1" -INNER JOIN ( - SELECT - "t1"."key" AS "key" - FROM ( - SELECT - * - FROM "leaf" AS "t0" - WHERE - TRUE - ) AS "t1" -) AS "t2" - ON "t1"."key" = "t2"."key" +) +SELECT + "t3"."key" +FROM "t1" AS "t3" +INNER JOIN "t1" AS "t4" + ON "t3"."key" = "t4"."key" INNER JOIN ( SELECT - "t1"."key" AS "key", - "t2"."key" AS "key_right" - FROM ( - SELECT - * - FROM "leaf" AS "t0" - WHERE - TRUE - ) AS "t1" - INNER JOIN ( - SELECT - "t1"."key" AS "key" - FROM ( - SELECT - * - FROM "leaf" AS "t0" - WHERE - TRUE - ) AS "t1" - ) AS "t2" - ON "t1"."key" = "t2"."key" -) AS "t4" - ON "t1"."key" = "t1"."key" \ No newline at end of file + "t3"."key" + FROM "t1" AS "t3" + INNER JOIN "t1" AS "t4" + ON "t3"."key" = "t4"."key" +) AS "t6" + ON "t3"."key" = "t6"."key" \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/druid/out.sql b/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/druid/out.sql new file mode 100644 index 000000000000..3cccc7356173 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/druid/out.sql @@ -0,0 +1,20 @@ +WITH "t1" AS ( + SELECT + "t0"."key" + FROM "leaf" AS "t0" + WHERE + TRUE +) +SELECT + "t3"."key" +FROM "t1" AS "t3" +INNER JOIN "t1" AS "t4" + ON "t3"."key" = "t4"."key" +INNER JOIN ( + SELECT + "t3"."key" + FROM "t1" AS "t3" + INNER JOIN "t1" AS "t4" + ON "t3"."key" = "t4"."key" +) AS "t6" + ON "t3"."key" = "t6"."key" \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/duckdb/out.sql b/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/duckdb/out.sql index fb2ee62190b5..d0b7a174d49a 100644 --- a/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/duckdb/out.sql +++ b/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/duckdb/out.sql @@ -1,40 +1,20 @@ -SELECT - t1.key AS key, - t2.key AS key_right, - t5.key_right AS key_right_right -FROM ( +WITH t1 AS ( SELECT - t0.key AS key + t0.key FROM leaf AS t0 WHERE TRUE -) AS t1 -INNER JOIN ( - SELECT - t0.key AS key - FROM leaf AS t0 - WHERE - TRUE -) AS t2 - ON t1.key = t2.key +) +SELECT + t3.key +FROM t1 AS t3 +INNER JOIN t1 AS t4 + ON t3.key = t4.key INNER JOIN ( SELECT - t1.key AS key, - t2.key AS key_right - FROM ( - SELECT - t0.key AS key - FROM leaf AS t0 - WHERE - TRUE - ) AS t1 - INNER JOIN ( - SELECT - t0.key AS key - FROM leaf AS t0 - WHERE - TRUE - ) AS t2 - ON t1.key = t2.key -) AS t5 - ON t1.key = t5.key \ No newline at end of file + t3.key + FROM t1 AS t3 + INNER JOIN t1 AS t4 + ON t3.key = t4.key +) AS t6 + ON t3.key = t6.key \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/exasol/out.sql b/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/exasol/out.sql new file mode 100644 index 000000000000..3cccc7356173 --- /dev/null +++ b/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/exasol/out.sql @@ -0,0 +1,20 @@ +WITH "t1" AS ( + SELECT + "t0"."key" + FROM "leaf" AS "t0" + WHERE + TRUE +) +SELECT + "t3"."key" +FROM "t1" AS "t3" +INNER JOIN "t1" AS "t4" + ON "t3"."key" = "t4"."key" +INNER JOIN ( + SELECT + "t3"."key" + FROM "t1" AS "t3" + INNER JOIN "t1" AS "t4" + ON "t3"."key" = "t4"."key" +) AS "t6" + ON "t3"."key" = "t6"."key" \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/impala/out.sql b/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/impala/out.sql index b5eb154f064d..8d13c9ddda1b 100644 --- a/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/impala/out.sql +++ b/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/impala/out.sql @@ -1,22 +1,20 @@ -WITH t0 AS ( +WITH `t1` AS ( SELECT - t4.* - FROM `leaf` AS t4 + `t0`.`key` + FROM `leaf` AS `t0` WHERE TRUE -), t1 AS ( - SELECT - t0.`key` - FROM t0 -), t2 AS ( - SELECT - t0.`key` - FROM t0 - INNER JOIN t1 - ON t0.`key` = t1.`key` ) SELECT - t2.`key` -FROM t2 -INNER JOIN t2 AS t3 - ON t2.`key` = t3.`key` \ No newline at end of file + `t3`.`key` +FROM `t1` AS `t3` +INNER JOIN `t1` AS `t4` + ON `t3`.`key` = `t4`.`key` +INNER JOIN ( + SELECT + `t3`.`key` + FROM `t1` AS `t3` + INNER JOIN `t1` AS `t4` + ON `t3`.`key` = `t4`.`key` +) AS `t6` + ON `t3`.`key` = `t6`.`key` \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/mssql/out.sql b/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/mssql/out.sql index dbe9900fb111..217eafe26f55 100644 --- a/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/mssql/out.sql +++ b/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/mssql/out.sql @@ -1,22 +1,22 @@ -WITH t0 AS ( +WITH [t1] AS ( SELECT - t4.[key] AS [key] - FROM leaf AS t4 + [t0].[key] AS [key] + FROM [leaf] AS [t0] WHERE - 1 = 1 -), t1 AS ( - SELECT - t0.[key] AS [key] - FROM t0 -), t2 AS ( - SELECT - t0.[key] AS [key] - FROM t0 - JOIN t1 - ON t0.[key] = t1.[key] + ( + 1 = 1 + ) ) SELECT - t2.[key] -FROM t2 -JOIN t2 AS t3 - ON t2.[key] = t3.[key] \ No newline at end of file + [t3].[key] +FROM [t1] AS [t3] +INNER JOIN [t1] AS [t4] + ON [t3].[key] = [t4].[key] +INNER JOIN ( + SELECT + [t3].[key] AS [key] + FROM [t1] AS [t3] + INNER JOIN [t1] AS [t4] + ON [t3].[key] = [t4].[key] +) AS [t6] + ON [t3].[key] = [t6].[key] \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/mysql/out.sql b/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/mysql/out.sql index b3bba37ea6d9..8d13c9ddda1b 100644 --- a/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/mysql/out.sql +++ b/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/mysql/out.sql @@ -1,22 +1,20 @@ -WITH t0 AS ( +WITH `t1` AS ( SELECT - t4.`key` AS `key` - FROM leaf AS t4 + `t0`.`key` + FROM `leaf` AS `t0` WHERE - TRUE = 1 -), t1 AS ( - SELECT - t0.`key` AS `key` - FROM t0 -), t2 AS ( - SELECT - t0.`key` AS `key` - FROM t0 - INNER JOIN t1 - ON t0.`key` = t1.`key` + TRUE ) SELECT - t2.`key` -FROM t2 -INNER JOIN t2 AS t3 - ON t2.`key` = t3.`key` \ No newline at end of file + `t3`.`key` +FROM `t1` AS `t3` +INNER JOIN `t1` AS `t4` + ON `t3`.`key` = `t4`.`key` +INNER JOIN ( + SELECT + `t3`.`key` + FROM `t1` AS `t3` + INNER JOIN `t1` AS `t4` + ON `t3`.`key` = `t4`.`key` +) AS `t6` + ON `t3`.`key` = `t6`.`key` \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/oracle/out.sql b/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/oracle/out.sql index 1a5051a047b2..8a330a708765 100644 --- a/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/oracle/out.sql +++ b/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/oracle/out.sql @@ -1,22 +1,20 @@ -WITH t0 AS ( +WITH "t1" AS ( SELECT - t4."key" AS "key" - FROM "leaf" t4 + "t0"."key" + FROM "leaf" "t0" WHERE - 1 = 1 -), t1 AS ( - SELECT - t0."key" AS "key" - FROM t0 -), t2 AS ( - SELECT - t0."key" AS "key" - FROM t0 - JOIN t1 - ON t0."key" = t1."key" + TRUE ) SELECT - t2."key" -FROM t2 -JOIN t2 t3 - ON t2."key" = t3."key" \ No newline at end of file + "t3"."key" +FROM "t1" "t3" +INNER JOIN "t1" "t4" + ON "t3"."key" = "t4"."key" +INNER JOIN ( + SELECT + "t3"."key" + FROM "t1" "t3" + INNER JOIN "t1" "t4" + ON "t3"."key" = "t4"."key" +) "t6" + ON "t3"."key" = "t6"."key" \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/postgres/out.sql b/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/postgres/out.sql index 8d5d47b6920b..3cccc7356173 100644 --- a/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/postgres/out.sql +++ b/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/postgres/out.sql @@ -1,22 +1,20 @@ -WITH t0 AS ( +WITH "t1" AS ( SELECT - t4.key AS key - FROM leaf AS t4 + "t0"."key" + FROM "leaf" AS "t0" WHERE TRUE -), t1 AS ( - SELECT - t0.key AS key - FROM t0 -), t2 AS ( - SELECT - t0.key AS key - FROM t0 - JOIN t1 - ON t0.key = t1.key ) SELECT - t2.key -FROM t2 -JOIN t2 AS t3 - ON t2.key = t3.key \ No newline at end of file + "t3"."key" +FROM "t1" AS "t3" +INNER JOIN "t1" AS "t4" + ON "t3"."key" = "t4"."key" +INNER JOIN ( + SELECT + "t3"."key" + FROM "t1" AS "t3" + INNER JOIN "t1" AS "t4" + ON "t3"."key" = "t4"."key" +) AS "t6" + ON "t3"."key" = "t6"."key" \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/pyspark/out.sql b/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/pyspark/out.sql new file mode 100644 index 000000000000..8d13c9ddda1b --- /dev/null +++ b/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/pyspark/out.sql @@ -0,0 +1,20 @@ +WITH `t1` AS ( + SELECT + `t0`.`key` + FROM `leaf` AS `t0` + WHERE + TRUE +) +SELECT + `t3`.`key` +FROM `t1` AS `t3` +INNER JOIN `t1` AS `t4` + ON `t3`.`key` = `t4`.`key` +INNER JOIN ( + SELECT + `t3`.`key` + FROM `t1` AS `t3` + INNER JOIN `t1` AS `t4` + ON `t3`.`key` = `t4`.`key` +) AS `t6` + ON `t3`.`key` = `t6`.`key` \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/snowflake/out.sql b/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/snowflake/out.sql index eb9acf0a45fe..3cccc7356173 100644 --- a/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/snowflake/out.sql +++ b/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/snowflake/out.sql @@ -1,40 +1,20 @@ -SELECT - "t1"."key" AS "key", - "t2"."key" AS "key_right", - "t5"."key_right" AS "key_right_right" -FROM ( +WITH "t1" AS ( SELECT - "t0"."key" AS "key" + "t0"."key" FROM "leaf" AS "t0" WHERE TRUE -) AS "t1" -INNER JOIN ( - SELECT - "t0"."key" AS "key" - FROM "leaf" AS "t0" - WHERE - TRUE -) AS "t2" - ON "t1"."key" = "t2"."key" +) +SELECT + "t3"."key" +FROM "t1" AS "t3" +INNER JOIN "t1" AS "t4" + ON "t3"."key" = "t4"."key" INNER JOIN ( SELECT - "t1"."key" AS "key", - "t2"."key" AS "key_right" - FROM ( - SELECT - "t0"."key" AS "key" - FROM "leaf" AS "t0" - WHERE - TRUE - ) AS "t1" - INNER JOIN ( - SELECT - "t0"."key" AS "key" - FROM "leaf" AS "t0" - WHERE - TRUE - ) AS "t2" - ON "t1"."key" = "t2"."key" -) AS "t5" - ON "t1"."key" = "t5"."key" \ No newline at end of file + "t3"."key" + FROM "t1" AS "t3" + INNER JOIN "t1" AS "t4" + ON "t3"."key" = "t4"."key" +) AS "t6" + ON "t3"."key" = "t6"."key" \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/trino/out.sql b/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/trino/out.sql index 8d5d47b6920b..3cccc7356173 100644 --- a/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/trino/out.sql +++ b/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/trino/out.sql @@ -1,22 +1,20 @@ -WITH t0 AS ( +WITH "t1" AS ( SELECT - t4.key AS key - FROM leaf AS t4 + "t0"."key" + FROM "leaf" AS "t0" WHERE TRUE -), t1 AS ( - SELECT - t0.key AS key - FROM t0 -), t2 AS ( - SELECT - t0.key AS key - FROM t0 - JOIN t1 - ON t0.key = t1.key ) SELECT - t2.key -FROM t2 -JOIN t2 AS t3 - ON t2.key = t3.key \ No newline at end of file + "t3"."key" +FROM "t1" AS "t3" +INNER JOIN "t1" AS "t4" + ON "t3"."key" = "t4"."key" +INNER JOIN ( + SELECT + "t3"."key" + FROM "t1" AS "t3" + INNER JOIN "t1" AS "t4" + ON "t3"."key" = "t4"."key" +) AS "t6" + ON "t3"."key" = "t6"."key" \ No newline at end of file diff --git a/ibis/backends/tests/test_generic.py b/ibis/backends/tests/test_generic.py index a685628c4c1d..60df8407803b 100644 --- a/ibis/backends/tests/test_generic.py +++ b/ibis/backends/tests/test_generic.py @@ -992,24 +992,12 @@ def test_memtable_column_naming_mismatch(backend, con, monkeypatch, df, columns) ibis.memtable(df, columns=columns) -@pytest.mark.xfail( - raises=com.IntegrityError, reason="inner join convenience not implemented" -) @pytest.mark.notimpl( - ["dask", "datafusion", "pandas", "polars"], + ["dask", "pandas", "polars"], raises=NotImplementedError, reason="not a SQL backend", ) -@pytest.mark.notimpl( - ["pyspark"], reason="pyspark doesn't generate SQL", raises=NotImplementedError -) -@pytest.mark.notimpl(["druid", "flink"], reason="no sqlglot dialect", raises=ValueError) -@pytest.mark.notimpl(["exasol"], raises=com.OperationNotDefinedError) -@pytest.mark.notimpl( - ["risingwave"], - raises=ValueError, - reason="risingwave doesn't support sqlglot.dialects.dialect.Dialect", -) +@pytest.mark.notimpl(["flink"], reason="no sqlglot dialect", raises=ValueError) def test_many_subqueries(con, snapshot): def query(t, group_cols): t2 = t.mutate(key=ibis.row_number().over(ibis.window(order_by=group_cols))) diff --git a/ibis/backends/tests/test_sql.py b/ibis/backends/tests/test_sql.py index 0db31eb5662d..a9d526c30146 100644 --- a/ibis/backends/tests/test_sql.py +++ b/ibis/backends/tests/test_sql.py @@ -83,10 +83,7 @@ def test_group_by_has_index(backend, snapshot): snapshot.assert_match(sql, "out.sql") -@pytest.mark.xfail( - raises=exc.IntegrityError, reason="inner join convenience not implemented" -) -@pytest.mark.never(["pandas", "dask", "polars", "pyspark"], reason="not SQL") +@pytest.mark.never(["pandas", "dask", "polars"], reason="not SQL") def test_cte_refs_in_topo_order(backend, snapshot): mr0 = ibis.table(schema=ibis.schema(dict(key="int")), name="leaf") diff --git a/ibis/common/egraph.py b/ibis/common/egraph.py index ea18870ab401..764ac890b1ec 100644 --- a/ibis/common/egraph.py +++ b/ibis/common/egraph.py @@ -120,6 +120,19 @@ def __eq__(self, other: object) -> bool: return NotImplemented return self._parents == other._parents + def copy(self) -> DisjointSet: + """Make a copy of the disjoint set. + + Returns + ------- + copy: + A copy of the disjoint set. + """ + ds = DisjointSet() + ds._parents = self._parents.copy() + ds._classes = self._classes.copy() + return ds + def add(self, id: K) -> K: """Add a new id to the disjoint set. diff --git a/ibis/common/tests/test_egraph.py b/ibis/common/tests/test_egraph.py index b31c527bac17..98fcd04bf1ce 100644 --- a/ibis/common/tests/test_egraph.py +++ b/ibis/common/tests/test_egraph.py @@ -83,6 +83,13 @@ def test_disjoint_set(): ds._classes[1] = {1} ds.verify() + # test copying the disjoint set + ds2 = ds.copy() + assert ds == ds2 + assert ds is not ds2 + ds2.add(5) + assert ds != ds2 + class PatternNamespace: def __init__(self, module): diff --git a/ibis/expr/tests/snapshots/test_format/test_table_count_expr/join_repr.txt b/ibis/expr/tests/snapshots/test_format/test_table_count_expr/join_repr.txt index 6f7009dc8056..a8504725070a 100644 --- a/ibis/expr/tests/snapshots/test_format/test_table_count_expr/join_repr.txt +++ b/ibis/expr/tests/snapshots/test_format/test_table_count_expr/join_repr.txt @@ -12,7 +12,6 @@ r2 := JoinChain[r0] values: a: r0.a b: r0.b - a_right: r1.a b_right: r1.b CountStar(): CountStar(r2) \ No newline at end of file diff --git a/ibis/expr/tests/test_newrels.py b/ibis/expr/tests/test_newrels.py index cdd4cd64b049..24c9661d6e6c 100644 --- a/ibis/expr/tests/test_newrels.py +++ b/ibis/expr/tests/test_newrels.py @@ -719,7 +719,6 @@ def test_join_predicate_dereferencing(): "foo_id_right": r2.foo_id, "value1": r2.value1, "value3": r2.value3, - "bar_id_right": r3.bar_id, "value2": r3.value2, }, ) @@ -941,7 +940,7 @@ def test_self_join(): rest=[ ops.JoinLink("inner", r2, [r1.key == r2.key]), ], - values={"key": r1.key, "key_right": r2.key}, + values={"key": r1.key}, ) assert t3.op() == expected @@ -951,11 +950,7 @@ def test_self_join(): ops.JoinLink("inner", r2, [r1.key == r2.key]), ops.JoinLink("inner", r3, [r1.key == r3.key]), ], - values={ - "key": r1.key, - "key_right": r2.key, - "key_right_right": r3.key_right, - }, + values={"key": r1.key}, ) assert t4.op() == expected @@ -1061,7 +1056,6 @@ def test_self_join_extensive(): values={ "a": r1.a, "b": r1.b, - "a_right": r2.a, "b_right": r2.b, }, ) @@ -1083,7 +1077,6 @@ def test_self_join_extensive(): values={ "a": r1.a, "b": r1.b, - "a_right": r2.a, "b_right": r2.b, }, ) @@ -1106,7 +1099,6 @@ def test_self_join_with_intermediate_selection(): values={ "b": r1.b, "a": r1.a, - "a_right": r2.a, "b_right": r2.b, }, ) @@ -1124,7 +1116,6 @@ def test_self_join_with_intermediate_selection(): values={ "a": r1.a, "b_right": r2.b, - "a_right": r3.a, "b": r3.b, }, ) @@ -1133,7 +1124,7 @@ def test_self_join_with_intermediate_selection(): # TODO(kszucs): this use case could be supported if `_get_column` gets # overridden to return underlying column reference, but that would mean # that `aa.a` returns with `a.a` instead of `aa.a` which breaks other - # things + # things; the other possible solution is to use 2way dereferencing # aa = a.join(a, [a.a == a.a]) # aaa = aa["a", "b_right"].join(a, [aa.a == a.a]) # a0 = a @@ -1356,3 +1347,126 @@ def test_join_with_compound_predicate(): }, ) assert expr.op() == expected + + +def test_inner_join_convenience(): + t1 = ibis.table(name="t1", schema={"a": "int64", "b": "string"}) + t2 = ibis.table(name="t2", schema={"a": "int64", "c": "string"}) + t3 = ibis.table(name="t3", schema={"a": "int64", "d": "string"}) + t4 = ibis.table(name="t4", schema={"a": "int64", "e": "string"}) + t5 = ibis.table(name="t5", schema={"a": "int64", "f": "string"}) + + first_join = t1.inner_join(t2, [t1.a == t2.a]) + with join_tables(t1, t2) as (r1, r2): + expected = ops.JoinChain( + first=r1, + rest=[ + ops.JoinLink("inner", r2, [r1.a == r2.a]), + ], + values={ + "a": r1.a, + "b": r1.b, + "c": r2.c, + }, + ) + # finish to evaluate the collisions + result = first_join._finish().op() + assert result == expected + + # note that we are joining on r2.a which isn't among the values + second_join = first_join.inner_join(t3, [r2.a == t3.a]) + with join_tables(t1, t2, t3) as (r1, r2, r3): + expected = ops.JoinChain( + first=r1, + rest=[ + ops.JoinLink("inner", r2, [r1.a == r2.a]), + ops.JoinLink("inner", r3, [r2.a == r3.a]), + ], + values={ + "a": r1.a, + "b": r1.b, + "c": r2.c, + "d": r3.d, + }, + ) + # finish to evaluate the collisions + result = second_join._finish().op() + assert result == expected + + third_join = second_join.left_join(t4, [r3.a == t4.a]) + with join_tables(t1, t2, t3, t4) as (r1, r2, r3, r4): + expected = ops.JoinChain( + first=r1, + rest=[ + ops.JoinLink("inner", r2, [r1.a == r2.a]), + ops.JoinLink("inner", r3, [r2.a == r3.a]), + ops.JoinLink("left", r4, [r3.a == r4.a]), + ], + values={ + "a": r1.a, + "b": r1.b, + "c": r2.c, + "d": r3.d, + "a_right": r4.a, + "e": r4.e, + }, + ) + # finish to evaluate the collisions + result = third_join._finish().op() + assert result == expected + + fourth_join = third_join.inner_join(t5, [r3.a == t5.a], rname="{name}_") + with join_tables(t1, t2, t3, t4, t5) as (r1, r2, r3, r4, r5): + # equality groups are being reset + expected = ops.JoinChain( + first=r1, + rest=[ + ops.JoinLink("inner", r2, [r1.a == r2.a]), + ops.JoinLink("inner", r3, [r2.a == r3.a]), + ops.JoinLink("left", r4, [r3.a == r4.a]), + ops.JoinLink("inner", r5, [r3.a == r5.a]), + ], + values={ + "a": r1.a, + "b": r1.b, + "c": r2.c, + "d": r3.d, + "a_right": r4.a, + "e": r4.e, + "f": r5.f, + }, + ) + # finish to evaluate the collisions + result = fourth_join._finish().op() + assert result == expected + + with pytest.raises(IntegrityError): + # equality groups are being reset, t5.a would be renamed to 'a_right' + # which already exists + third_join.inner_join(t5, [r4.a == t5.a])._finish() + + fifth_join = third_join.inner_join(t5, [r4.a == t5.a], rname="{name}_") + with join_tables(t1, t2, t3, t4, t5) as (r1, r2, r3, r4, r5): + # equality groups are being reset + expected = ops.JoinChain( + first=r1, + rest=[ + ops.JoinLink("inner", r2, [r1.a == r2.a]), + ops.JoinLink("inner", r3, [r2.a == r3.a]), + ops.JoinLink("left", r4, [r3.a == r4.a]), + ops.JoinLink("inner", r5, [r4.a == r5.a]), + ], + values={ + "a": r1.a, + "b": r1.b, + "c": r2.c, + "d": r3.d, + "a_right": r4.a, + "e": r4.e, + "a_": r5.a, + "f": r5.f, + }, + ) + # finish to evaluate the collisions + result = fifth_join._finish().op() + assert result == expected diff --git a/ibis/expr/types/joins.py b/ibis/expr/types/joins.py index 000a2ecdfb2c..ab6c587b8a33 100644 --- a/ibis/expr/types/joins.py +++ b/ibis/expr/types/joins.py @@ -2,7 +2,7 @@ import functools from public import public -from typing import Any, Optional +from typing import Any, Optional, TYPE_CHECKING from collections.abc import Iterator, Mapping import ibis @@ -21,34 +21,92 @@ ) from ibis.expr.operations.relations import JoinKind from ibis.expr.rewrites import peel_join_field +from ibis.common.egraph import DisjointSet +if TYPE_CHECKING: + from collections.abc import Sequence -def disambiguate_fields(how, left_fields, right_fields, lname, rname): + +def disambiguate_fields( + how, + predicates, + equalities, + left_fields, + right_fields, + left_template, + right_template, +): + """ + Resolve name collisions between the left and right tables. + """ collisions = set() + left_template = left_template or "{name}" + right_template = right_template or "{name}" + + if how == "inner" and util.all_of(predicates, ops.Equals): + # for inner joins composed exclusively of equality predicates, we can + # avoid renaming columns with colliding names if their values are + # guaranteed to be equal due to the predicate + equalities = equalities.copy() + for pred in predicates: + if isinstance(pred.left, ops.Field) and isinstance(pred.right, ops.Field): + # disjoint sets are used to track the equality groups + equalities.add(pred.left) + equalities.add(pred.right) + equalities.union(pred.left, pred.right) if how in ("semi", "anti"): # discard the right fields per left semi and left anty join semantics - return left_fields, collisions - - lname = lname or "{name}" - rname = rname or "{name}" - overlap = left_fields.keys() & right_fields.keys() + return left_fields, collisions, equalities fields = {} for name, field in left_fields.items(): - if name in overlap: - name = lname.format(name=name) + if name in right_fields: + # there is an overlap between this field and a field from the right + try: + # check if the fields are equal due to equality predicates + are_equal = equalities.connected(field, right_fields[name]) + except KeyError: + are_equal = False + if not are_equal: + # there is a name collision and the fields are not equal, so + # rename the field from the left according to the provided + # template (which is the name itself by default) + name = left_template.format(name=name) + fields[name] = field + for name, field in right_fields.items(): - if name in overlap: - name = rname.format(name=name) - # only add if there is no collision + if name in left_fields: + # there is an overlap between this field and a field from the left + try: + # check if the fields are equal due to equality predicates + are_equal = equalities.connected(field, left_fields[name]) + except KeyError: + are_equal = False + + if are_equal: + # even though there is a name collision, the fields are equal + # due to equality predicates, so we can safely discard the + # field from the right + continue + else: + # there is a name collision and the fields are not equal, so + # rename the field from the right according to the provided + # template + name = right_template.format(name=name) + if name in fields: + # we can still have collisions after multiple joins, or a wrongly + # chosen template, so we need to track the collisions collisions.add(name) else: + # the field name does not collide with any field from the left + # and not occupied by any field from the right, so add it to the + # fields mapping fields[name] = field - return fields, collisions + return fields, collisions, equalities def dereference_mapping_left(chain): @@ -81,23 +139,60 @@ def dereference_sides(left, right, deref_left, deref_right): return left, right -def dereference_comparison(pred, deref_left, deref_right): - left, right = dereference_sides(pred.left, pred.right, deref_left, deref_right) - return pred.copy(left=left, right=right) - - def dereference_value(pred, deref_left, deref_right): deref_both = {**deref_left, **deref_right} if isinstance(pred, ops.Comparison) and pred.left.relations == pred.right.relations: - return dereference_comparison(pred, deref_left, deref_right) + left, right = dereference_sides(pred.left, pred.right, deref_left, deref_right) + return pred.copy(left=left, right=right) else: return pred.replace(deref_both, filter=ops.Value) def prepare_predicates( - left, right, predicates, deref_left, deref_right, comparison=ops.Equals + left: ops.JoinChain, + right: ops.Relation, + predicates: Sequence[Any], + comparison: type[ops.Comparison] = ops.Equals, ): - """Bind and dereference predicates to the left and right tables.""" + """Bind and dereference predicates to the left and right tables. + + The responsibility of this function is twofold: + 1. Convert the various input values to valid predicates, including binding. + 2. Dereference the predicates one of the ops.JoinTable(s) in the join chain + or the new JoinTable wrapping the right table. JoinTable(s) are used to + ensure that all join participants are unique, even if the same table is + joined multiple times. + + Since join predicates can be ambiguous sometimes, we do the two steps above + in the same time so that we have more contextual information to resolve + ambiguities. + + Possible inputs for the predicates: + 1. A python boolean literal, which is converted to a literal expression + 2. A boolean `Value` expression, which gets flattened and dereferenced. + If there are comparison expressions where both sides depend on the same + relation, then the left side is dereferenced to one of the join tables + already part of the join chain, while the right side is dereferenced to + the new join table wrapping the right table. + 3. A `Deferred` expression, which gets resolved on the left table and then + the same path is followed as for `Value` expressions. + 4. A pair of expression-like objects, which are getting bound to the left + and right tables respectively using the robust `bind` function handling + several cases, including `Deferred` expressions, `Selector`s, literals, + etc. Then the left are dereferenced to the join chain whereas the right + to the new join table wrapping the right table. + + Parameters + ---------- + left + The left table + right + The right table + predicates + Predicates to bind and dereference, see the possible values above + """ + deref_left = dereference_mapping_left(left) + deref_right = dereference_mapping_right(right) left, right = left.to_expr(), right.to_expr() for pred in util.promote_list(predicates): @@ -142,9 +237,9 @@ def wrapper(self, *args, **kwargs): @public class Join(Table): - __slots__ = ("_collisions",) + __slots__ = ("_collisions", "_equalities") - def __init__(self, arg, collisions=None): + def __init__(self, arg, collisions=(), equalities=()): assert isinstance(arg, ops.Node) if not isinstance(arg, ops.JoinChain): # coerce the input node to a join chain operation by first wrapping @@ -154,7 +249,15 @@ def __init__(self, arg, collisions=None): arg = ops.JoinTable(arg, index=0) arg = ops.JoinChain(arg, rest=(), values=arg.fields) super().__init__(arg) + # the collisions and equalities are used to track the name collisions + # and the equality groups join fields based on equality predicates; + # these must be tracked in the join expression because the join chain + # operation doesn't hold any information about `lname` and `rname` + # parameters passed to the join methods and used to disambiguate field + # names; the collisions are used to raise an error if there are any + # name collisions after the join chain is finished object.__setattr__(self, "_collisions", collisions or set()) + object.__setattr__(self, "_equalities", equalities or DisjointSet()) def _finish(self) -> Table: """Construct a valid table expression from this join expression.""" @@ -190,19 +293,9 @@ def join( # noqa: D102 left = self.op() right = ops.JoinTable(right, index=left.length) - subs_left = dereference_mapping_left(left) - subs_right = dereference_mapping_right(right) # bind and dereference the predicates - preds = list( - prepare_predicates( - left, - right, - predicates, - deref_left=subs_left, - deref_right=subs_right, - ) - ) + preds = list(prepare_predicates(left, right, predicates)) if not preds and how != "cross": # if there are no predicates, default to every row matching unless # the join is a cross join, because a cross join already has this @@ -213,8 +306,14 @@ def join( # noqa: D102 # effort to avoid collisions, but does not raise if there are any # if no disambiaution happens using a final .select() call, then # the finish() method will raise due to the name collisions - values, collisions = disambiguate_fields( - how, left.values, right.fields, lname, rname + values, collisions, equalities = disambiguate_fields( + how=how, + predicates=preds, + equalities=self._equalities, + left_fields=left.values, + right_fields=right.fields, + left_template=lname, + right_template=rname, ) # construct a new join link and add it to the join chain @@ -222,7 +321,7 @@ def join( # noqa: D102 left = left.copy(rest=left.rest + (link,), values=values) # return with a new JoinExpr wrapping the new join chain - return self.__class__(left, collisions=collisions) + return self.__class__(left, collisions=collisions, equalities=equalities) @functools.wraps(Table.asof_join) def asof_join( # noqa: D102 @@ -280,30 +379,20 @@ def asof_join( # noqa: D102 left = self.op() right = ops.JoinTable(right, index=left.length) - subs_left = dereference_mapping_left(left) - subs_right = dereference_mapping_right(right) # TODO(kszucs): add extra validation for `on` with clear error messages - (on,) = prepare_predicates( - left, - right, - [on], - deref_left=subs_left, - deref_right=subs_right, - comparison=ops.GreaterEqual, - ) - predicates = prepare_predicates( - left, - right, - predicates, - deref_left=subs_left, - deref_right=subs_right, - comparison=ops.Equals, - ) - preds = [on, *predicates] - - values, collisions = disambiguate_fields( - "asof", left.values, right.fields, lname, rname + (on,) = prepare_predicates(left, right, [on], comparison=ops.GreaterEqual) + preds = prepare_predicates(left, right, predicates, comparison=ops.Equals) + preds = [on, *preds] + + values, collisions, equalities = disambiguate_fields( + how="asof", + predicates=preds, + equalities=self._equalities, + left_fields=left.values, + right_fields=right.fields, + left_template=lname, + right_template=rname, ) # construct a new join link and add it to the join chain @@ -311,7 +400,7 @@ def asof_join( # noqa: D102 left = left.copy(rest=left.rest + (link,), values=values) # return with a new JoinExpr wrapping the new join chain - return self.__class__(left, collisions=collisions) + return self.__class__(left, collisions=collisions, equalities=equalities) @functools.wraps(Table.cross_join) def cross_join( # noqa: D102 diff --git a/ibis/tests/expr/test_table.py b/ibis/tests/expr/test_table.py index ede8a2aa1e54..d3ac796d3d3d 100644 --- a/ibis/tests/expr/test_table.py +++ b/ibis/tests/expr/test_table.py @@ -1101,7 +1101,7 @@ def test_self_join_no_view_convenience(table): expected_cols = list(table.columns) # TODO(kszucs): the inner join convenience to don't duplicate the # equivalent columns from the right table is not implemented yet - expected_cols.extend(f"{c}_right" for c in table.columns) # if c != "g") + expected_cols.extend(f"{c}_right" for c in table.columns if c != "g") assert result.columns == expected_cols @@ -1207,43 +1207,41 @@ def test_filter_join(): repr(filtered) -# TODO(kszucs): the inner join convenience to don't duplicate the equivalent -# columns from the right table is not implemented yet -# def test_inner_join_overlapping_column_names(): -# t1 = ibis.table([("foo", "string"), ("bar", "string"), ("value1", "double")]) -# t2 = ibis.table([("foo", "string"), ("bar", "string"), ("value2", "double")]) - -# joined = t1.join(t2, "foo") -# expected = t1.join(t2, t1.foo == t2.foo) -# assert_equal(joined, expected) -# assert joined.columns == ["foo", "bar", "value1", "bar_right", "value2"] - -# joined = t1.join(t2, ["foo", "bar"]) -# expected = t1.join(t2, [t1.foo == t2.foo, t1.bar == t2.bar]) -# assert_equal(joined, expected) -# assert joined.columns == ["foo", "bar", "value1", "value2"] - -# # Equality predicates don't have same name, need to rename -# joined = t1.join(t2, t1.foo == t2.bar) -# assert joined.columns == [ -# "foo", -# "bar", -# "value1", -# "foo_right", -# "bar_right", -# "value2", -# ] - -# # Not all predicates are equality, still need to rename -# joined = t1.join(t2, ["foo", t1.value1 < t2.value2]) -# assert joined.columns == [ -# "foo", -# "bar", -# "value1", -# "foo_right", -# "bar_right", -# "value2", -# ] +def test_inner_join_overlapping_column_names(): + t1 = ibis.table([("foo", "string"), ("bar", "string"), ("value1", "double")]) + t2 = ibis.table([("foo", "string"), ("bar", "string"), ("value2", "double")]) + + joined = t1.join(t2, "foo") + expected = t1.join(t2, t1.foo == t2.foo) + assert_equal(joined, expected) + assert joined.columns == ["foo", "bar", "value1", "bar_right", "value2"] + + joined = t1.join(t2, ["foo", "bar"]) + expected = t1.join(t2, [t1.foo == t2.foo, t1.bar == t2.bar]) + assert_equal(joined, expected) + assert joined.columns == ["foo", "bar", "value1", "value2"] + + # Equality predicates don't have same name, need to rename + joined = t1.join(t2, t1.foo == t2.bar) + assert joined.columns == [ + "foo", + "bar", + "value1", + "foo_right", + "bar_right", + "value2", + ] + + # Not all predicates are equality, still need to rename + joined = t1.join(t2, ["foo", t1.value1 < t2.value2]) + assert joined.columns == [ + "foo", + "bar", + "value1", + "foo_right", + "bar_right", + "value2", + ] @pytest.mark.parametrize( @@ -1275,7 +1273,6 @@ def test_join_key_alternatives(con, key_maker): "f": r1.f, "foo_id": r1.foo_id, "bar_id": r1.bar_id, - "foo_id_right": r2.foo_id, "value1": r2.value1, "value3": r2.value3, }, @@ -1364,9 +1361,6 @@ def test_unravel_compound_equijoin(table): "key2": r1.key2, "key3": r1.key3, "value1": r1.value1, - "key1_right": r2.key1, - "key2_right": r2.key2, - "key3_right": r2.key3, "value2": r2.value2, }, )