Skip to content

Commit

Permalink
feat(ir): more flexible dereferencing support for join right hand side
Browse files Browse the repository at this point in the history
  • Loading branch information
kszucs committed Apr 22, 2024
1 parent 33286f2 commit a11434b
Show file tree
Hide file tree
Showing 38 changed files with 469 additions and 361 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -15,5 +15,5 @@ SELECT
`t3`.`val`,
`t3`.`XYZ`
FROM `t1` AS `t3`
INNER JOIN `t1` AS `t5`
INNER JOIN `t1` AS `t4`
ON TRUE
3 changes: 2 additions & 1 deletion ibis/backends/bigquery/tests/unit/test_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,7 +274,8 @@ class MockBackend(ibis.backends.bigquery.Backend):
table = ops.SQLQueryResult("select * from t", schema, ibis_client).to_expr()
for _ in range(num_joins): # noqa: F402
table = table.mutate(dummy=ibis.literal(""))
table = table.left_join(table, ["dummy"])[[table]]
table_ = table.view()
table = table.left_join(table_, ["dummy"])[[table_]]

start = time.time()
table.compile()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,5 +13,5 @@ SELECT
"t1"."year",
"t1"."month"
FROM "functional_alltypes" AS "t1"
INNER JOIN "functional_alltypes" AS "t3"
ON "t1"."id" = "t3"."id"
INNER JOIN "functional_alltypes" AS "t2"
ON "t1"."id" = "t2"."id"
Original file line number Diff line number Diff line change
@@ -1 +1 @@
WITH `t9` AS (SELECT EXTRACT(year FROM `t8`.`odate`) AS `year`, COUNT(*) AS `CountStar()` FROM (SELECT `t6`.`c_custkey`, `t6`.`c_name`, `t6`.`c_address`, `t6`.`c_nationkey`, `t6`.`c_phone`, `t6`.`c_acctbal`, `t6`.`c_mktsegment`, `t6`.`c_comment`, `t4`.`r_name` AS `region`, `t7`.`o_totalprice`, CAST(`t7`.`o_orderdate` AS TIMESTAMP) AS `odate` FROM `tpch_region` AS `t4` INNER JOIN `tpch_nation` AS `t5` ON `t4`.`r_regionkey` = `t5`.`n_regionkey` INNER JOIN `tpch_customer` AS `t6` ON `t6`.`c_nationkey` = `t5`.`n_nationkey` INNER JOIN `tpch_orders` AS `t7` ON `t7`.`o_custkey` = `t6`.`c_custkey`) AS `t8` GROUP BY 1) SELECT `t11`.`year`, `t11`.`CountStar()` AS `pre_count`, `t13`.`CountStar()` AS `post_count` FROM `t9` AS `t11` INNER JOIN `t9` AS `t13` ON `t11`.`year` = `t13`.`year`
WITH `t9` AS (SELECT EXTRACT(year FROM `t8`.`odate`) AS `year`, COUNT(*) AS `CountStar()` FROM (SELECT `t6`.`c_custkey`, `t6`.`c_name`, `t6`.`c_address`, `t6`.`c_nationkey`, `t6`.`c_phone`, `t6`.`c_acctbal`, `t6`.`c_mktsegment`, `t6`.`c_comment`, `t4`.`r_name` AS `region`, `t7`.`o_totalprice`, CAST(`t7`.`o_orderdate` AS TIMESTAMP) AS `odate` FROM `tpch_region` AS `t4` INNER JOIN `tpch_nation` AS `t5` ON `t4`.`r_regionkey` = `t5`.`n_regionkey` INNER JOIN `tpch_customer` AS `t6` ON `t6`.`c_nationkey` = `t5`.`n_nationkey` INNER JOIN `tpch_orders` AS `t7` ON `t7`.`o_custkey` = `t6`.`c_custkey`) AS `t8` GROUP BY 1) SELECT `t11`.`year`, `t11`.`CountStar()` AS `pre_count`, `t12`.`CountStar()` AS `post_count` FROM `t9` AS `t11` INNER JOIN `t9` AS `t12` ON `t11`.`year` = `t12`.`year`
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@ SELECT
`t1`.`a`,
`t1`.`b`
FROM `t` AS `t1`
INNER JOIN `t` AS `t3`
ON `t1`.`a` = `t3`.`a`
INNER JOIN `t` AS `t2`
ON `t1`.`a` = `t2`.`a`
AND (
(
`t1`.`a` <> `t3`.`b`
`t1`.`a` <> `t2`.`b`
) OR (
`t1`.`b` <> `t3`.`a`
`t1`.`b` <> `t2`.`a`
)
)
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,13 @@ SELECT
`t1`.`a`,
`t1`.`b`
FROM `t` AS `t1`
INNER JOIN `t` AS `t3`
ON `t1`.`a` = `t3`.`a`
INNER JOIN `t` AS `t2`
ON `t1`.`a` = `t2`.`a`
AND (
(
`t1`.`a` <> `t3`.`b` OR `t1`.`b` <> `t3`.`a`
`t1`.`a` <> `t2`.`b` OR `t1`.`b` <> `t2`.`a`
)
AND NOT (
`t1`.`a` <> `t3`.`b` AND `t1`.`b` <> `t3`.`a`
`t1`.`a` <> `t2`.`b` AND `t1`.`b` <> `t2`.`a`
)
)
Original file line number Diff line number Diff line change
Expand Up @@ -19,5 +19,5 @@ SELECT
`t3`.`year`,
`t3`.`month`
FROM `t1` AS `t3`
INNER JOIN `t1` AS `t5`
INNER JOIN `t1` AS `t4`
ON TRUE
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ WITH `t1` AS (
1
)
SELECT
`t5`.`uuid`,
`t3`.`uuid`,
`t3`.`CountStar(t)`
FROM (
SELECT
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ WITH `t1` AS (
1
)
SELECT
`t7`.`uuid`,
`t4`.`uuid`,
`t4`.`CountStar(t)`,
`t5`.`last_visit`
FROM (
Expand All @@ -28,4 +28,4 @@ LEFT OUTER JOIN (
GROUP BY
1
) AS `t5`
ON `t7`.`uuid` = `t5`.`uuid`
ON `t4`.`uuid` = `t5`.`uuid`
2 changes: 1 addition & 1 deletion ibis/backends/pandas/executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -540,7 +540,7 @@ def visit(cls, op: ops.DummyTable, values):
return df

@classmethod
def visit(cls, op: ops.SelfReference | ops.JoinTable, parent, **kwargs):
def visit(cls, op: ops.Reference, parent, **kwargs):
return parent

@classmethod
Expand Down
9 changes: 2 additions & 7 deletions ibis/backends/polars/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -1239,13 +1239,8 @@ def execute_view(op, *, ctx: pl.SQLContext, **kw):
return child


@translate.register(ops.SelfReference)
def execute_self_reference(op, **kw):
return translate(op.parent, **kw)


@translate.register(ops.JoinTable)
def execute_join_table(op, **kw):
@translate.register(ops.Reference)
def execute_reference(op, **kw):
return translate(op.parent, **kw)


Expand Down
5 changes: 2 additions & 3 deletions ibis/backends/sql/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -1158,6 +1158,8 @@ def visit_DatabaseTable(
def visit_SelfReference(self, op, *, parent, identifier):
return parent

visit_JoinReference = visit_SelfReference

def visit_JoinChain(self, op, *, first, rest, values):
result = sg.select(*self._cleanup_names(values), copy=False).from_(
first, copy=False
Expand Down Expand Up @@ -1388,9 +1390,6 @@ def visit_SQLStringView(self, op, *, query: str, child, schema):
def visit_SQLQueryResult(self, op, *, query, schema, source):
return sg.parse_one(query, dialect=self.dialect).subquery(copy=False)

def visit_JoinTable(self, op, *, parent, index):
return parent

def visit_RegexExtract(self, op, *, arg, pattern, index):
return self.f.regexp_extract(arg, pattern, index, dialect=self.dialect)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,5 +19,5 @@ SELECT
"t3"."year",
"t3"."month"
FROM "t1" AS "t3"
INNER JOIN "t1" AS "t5"
INNER JOIN "t1" AS "t4"
ON TRUE
Original file line number Diff line number Diff line change
Expand Up @@ -15,20 +15,20 @@ FROM (
"t1"."timestamp_col",
"t1"."year",
"t1"."month",
"t3"."id" AS "id_right",
"t3"."bool_col" AS "bool_col_right",
"t3"."tinyint_col" AS "tinyint_col_right",
"t3"."smallint_col" AS "smallint_col_right",
"t3"."int_col" AS "int_col_right",
"t3"."bigint_col" AS "bigint_col_right",
"t3"."float_col" AS "float_col_right",
"t3"."double_col" AS "double_col_right",
"t3"."date_string_col" AS "date_string_col_right",
"t3"."string_col" AS "string_col_right",
"t3"."timestamp_col" AS "timestamp_col_right",
"t3"."year" AS "year_right",
"t3"."month" AS "month_right"
"t2"."id" AS "id_right",
"t2"."bool_col" AS "bool_col_right",
"t2"."tinyint_col" AS "tinyint_col_right",
"t2"."smallint_col" AS "smallint_col_right",
"t2"."int_col" AS "int_col_right",
"t2"."bigint_col" AS "bigint_col_right",
"t2"."float_col" AS "float_col_right",
"t2"."double_col" AS "double_col_right",
"t2"."date_string_col" AS "date_string_col_right",
"t2"."string_col" AS "string_col_right",
"t2"."timestamp_col" AS "timestamp_col_right",
"t2"."year" AS "year_right",
"t2"."month" AS "month_right"
FROM "functional_alltypes" AS "t1"
INNER JOIN "functional_alltypes" AS "t3"
ON "t1"."tinyint_col" < EXTRACT(minute FROM "t3"."timestamp_col")
) AS "t4"
INNER JOIN "functional_alltypes" AS "t2"
ON "t1"."tinyint_col" < EXTRACT(minute FROM "t2"."timestamp_col")
) AS "t3"
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
SELECT
"t2"."r_name",
"t6"."n_name"
"t5"."n_name"
FROM "tpch_region" AS "t2"
INNER JOIN "tpch_nation" AS "t3"
ON "t2"."r_regionkey" = "t3"."n_regionkey"
Expand All @@ -16,5 +16,5 @@ INNER JOIN (
FROM "tpch_region" AS "t2"
INNER JOIN "tpch_nation" AS "t3"
ON "t2"."r_regionkey" = "t3"."n_regionkey"
) AS "t6"
ON "t2"."r_regionkey" = "t6"."r_regionkey"
) AS "t5"
ON "t2"."r_regionkey" = "t5"."r_regionkey"
Original file line number Diff line number Diff line change
Expand Up @@ -7,25 +7,25 @@ WITH "t1" AS (
GROUP BY
1,
2
), "t6" AS (
), "t5" AS (
SELECT
"t3"."a",
"t3"."g",
"t3"."metric"
FROM "t1" AS "t3"
INNER JOIN "t1" AS "t5"
ON "t3"."g" = "t5"."g"
INNER JOIN "t1" AS "t4"
ON "t3"."g" = "t4"."g"
)
SELECT
"t9"."a",
"t9"."g",
"t9"."metric"
"t8"."a",
"t8"."g",
"t8"."metric"
FROM (
SELECT
*
FROM "t6" AS "t7"
FROM "t5" AS "t6"
UNION ALL
SELECT
*
FROM "t6" AS "t8"
) AS "t9"
FROM "t5" AS "t7"
) AS "t8"
Original file line number Diff line number Diff line change
Expand Up @@ -11,21 +11,21 @@ WITH "t1" AS (
3
)
SELECT
"t6"."g",
MAX("t6"."total" - "t6"."total_right") AS "metric"
"t5"."g",
MAX("t5"."total" - "t5"."total_right") AS "metric"
FROM (
SELECT
"t3"."g",
"t3"."a",
"t3"."b",
"t3"."total",
"t5"."g" AS "g_right",
"t5"."a" AS "a_right",
"t5"."b" AS "b_right",
"t5"."total" AS "total_right"
"t4"."g" AS "g_right",
"t4"."a" AS "a_right",
"t4"."b" AS "b_right",
"t4"."total" AS "total_right"
FROM "t1" AS "t3"
INNER JOIN "t1" AS "t5"
ON "t3"."a" = "t5"."b"
) AS "t6"
INNER JOIN "t1" AS "t4"
ON "t3"."a" = "t4"."b"
) AS "t5"
GROUP BY
1
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,9 @@ WITH "t9" AS (
SELECT
"t11"."region",
"t11"."year",
"t11"."total" - "t13"."total" AS "yoy_change"
"t11"."total" - "t12"."total" AS "yoy_change"
FROM "t9" AS "t11"
INNER JOIN "t9" AS "t13"
INNER JOIN "t9" AS "t12"
ON "t11"."year" = (
"t13"."year" - CAST(1 AS TINYINT)
"t12"."year" - CAST(1 AS TINYINT)
)
Original file line number Diff line number Diff line change
Expand Up @@ -16,5 +16,5 @@ INNER JOIN (
FROM "alltypes" AS "t1"
GROUP BY
1
) AS "t6"
ON "t3"."g" = "t6"."g"
) AS "t5"
ON "t3"."g" = "t5"."g"
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,5 @@ SELECT
"t1"."foo_id",
"t1"."bar_id"
FROM "star1" AS "t1"
INNER JOIN "star1" AS "t3"
ON "t1"."foo_id" = "t3"."bar_id"
INNER JOIN "star1" AS "t2"
ON "t1"."foo_id" = "t2"."bar_id"
Loading

0 comments on commit a11434b

Please sign in to comment.