From 8fdeb27f3155077273a0dec1dca1e2b81b5adcd9 Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Fri, 29 Dec 2023 12:17:11 -0500 Subject: [PATCH 1/3] feat(trino): port to sqlglot --- .github/workflows/ibis-backends.yml | 37 +- ibis/backends/base/sqlglot/datatypes.py | 64 ++ ibis/backends/base/sqlglot/rewrites.py | 47 ++ ibis/backends/conftest.py | 2 +- ibis/backends/tests/errors.py | 5 + .../test_default_limit/trino/out.sql | 5 + .../test_disable_query_limit/trino/out.sql | 5 + .../trino/out.sql | 3 + .../test_respect_set_limit/trino/out.sql | 10 + .../test_group_by_has_index/trino/out.sql | 8 +- .../test_sql/test_isin_bug/trino/out.sql | 18 +- .../test_union_aliasing/trino/out.sql | 191 ++++-- ibis/backends/tests/test_aggregation.py | 18 +- ibis/backends/tests/test_array.py | 81 +-- ibis/backends/tests/test_asof_join.py | 4 +- ibis/backends/tests/test_binary.py | 9 +- ibis/backends/tests/test_client.py | 4 +- ibis/backends/tests/test_export.py | 6 +- ibis/backends/tests/test_generic.py | 51 +- ibis/backends/tests/test_numeric.py | 40 +- ibis/backends/tests/test_struct.py | 11 +- ibis/backends/tests/test_temporal.py | 3 +- ibis/backends/tests/test_window.py | 2 + .../test_h01/test_tpc_h01/trino/h01.sql | 87 ++- .../test_h02/test_tpc_h02/trino/h02.sql | 264 +++++--- .../test_h03/test_tpc_h03/trino/h03.sql | 170 ++++- .../test_h04/test_tpc_h04/trino/h04.sql | 66 +- .../test_h05/test_tpc_h05/trino/h05.sql | 212 ++++++- .../test_h06/test_tpc_h06/trino/h06.sql | 33 +- .../test_h07/test_tpc_h07/trino/h07.sql | 172 +++-- .../test_h08/test_tpc_h08/trino/h08.sql | 188 ++++-- .../test_h09/test_tpc_h09/trino/h09.sql | 142 ++++- .../test_h10/test_tpc_h10/trino/h10.sql | 207 ++++-- .../test_h11/test_tpc_h11/trino/h11.sql | 184 +++++- .../test_h12/test_tpc_h12/trino/h12.sql | 125 +++- .../test_h13/test_tpc_h13/trino/h13.sql | 81 ++- .../test_h14/test_tpc_h14/trino/h14.sql | 115 +++- .../test_h15/test_tpc_h15/trino/h15.sql | 173 +++-- .../test_h16/test_tpc_h16/trino/h16.sql | 116 +++- .../test_h17/test_tpc_h17/trino/h17.sql | 134 +++- .../test_h18/test_tpc_h18/trino/h18.sql | 201 ++++-- .../test_h19/test_tpc_h19/trino/h19.sql | 205 +++++- .../test_h20/test_tpc_h20/trino/h20.sql | 152 +++-- .../test_h21/test_tpc_h21/trino/h21.sql | 196 ++++-- .../test_h22/test_tpc_h22/trino/h22.sql | 109 ++-- ibis/backends/tests/tpch/test_h15.py | 11 +- ibis/backends/trino/__init__.py | 484 +++++++++----- ibis/backends/trino/compiler.py | 565 +++++++++++++++-- ibis/backends/trino/converter.py | 24 + ibis/backends/trino/datatypes.py | 149 ----- ibis/backends/trino/registry.py | 600 ------------------ ibis/backends/trino/tests/conftest.py | 47 +- ibis/backends/trino/tests/test_client.py | 44 +- ibis/backends/trino/tests/test_datatypes.py | 2 +- ibis/expr/rewrites.py | 1 + poetry.lock | 5 +- pyproject.toml | 4 +- requirements-dev.txt | 2 +- 58 files changed, 3879 insertions(+), 2015 deletions(-) create mode 100644 ibis/backends/tests/snapshots/test_interactive/test_default_limit/trino/out.sql create mode 100644 ibis/backends/tests/snapshots/test_interactive/test_disable_query_limit/trino/out.sql create mode 100644 ibis/backends/tests/snapshots/test_interactive/test_interactive_execute_on_repr/trino/out.sql create mode 100644 ibis/backends/tests/snapshots/test_interactive/test_respect_set_limit/trino/out.sql create mode 100644 ibis/backends/trino/converter.py delete mode 100644 ibis/backends/trino/datatypes.py delete mode 100644 ibis/backends/trino/registry.py diff --git a/.github/workflows/ibis-backends.yml b/.github/workflows/ibis-backends.yml index 71b7d835b22a..a2f2d177bd19 100644 --- a/.github/workflows/ibis-backends.yml +++ b/.github/workflows/ibis-backends.yml @@ -142,13 +142,12 @@ jobs: # - freetds-dev # - unixodbc-dev # - tdsodbc - # - name: trino - # title: Trino - # extras: - # - trino - # - postgres - # services: - # - trino + - name: trino + title: Trino + extras: + - trino + services: + - trino # - name: druid # title: Druid # extras: @@ -248,15 +247,14 @@ jobs: # - freetds-dev # - unixodbc-dev # - tdsodbc - # - os: windows-latest - # backend: - # name: trino - # title: Trino - # services: - # - trino - # extras: - # - trino - # - postgres + - os: windows-latest + backend: + name: trino + title: Trino + services: + - trino + extras: + - trino # - os: windows-latest # backend: # name: druid @@ -685,13 +683,6 @@ jobs: # title: SQLite # extras: # - sqlite - # - name: trino - # title: Trino - # services: - # - trino - # extras: - # - trino - # - postgres # - name: oracle # title: Oracle # serial: true diff --git a/ibis/backends/base/sqlglot/datatypes.py b/ibis/backends/base/sqlglot/datatypes.py index 9c4a7fe531df..b3a9b643c1ed 100644 --- a/ibis/backends/base/sqlglot/datatypes.py +++ b/ibis/backends/base/sqlglot/datatypes.py @@ -447,6 +447,70 @@ class TrinoType(SqlglotType): } ) + @classmethod + def _from_ibis_Interval(cls, dtype: dt.Interval) -> sge.DataType: + assert dtype.unit is not None, "interval unit cannot be None" + if (short := dtype.unit.short) in ("Y", "Q", "M"): + return sge.DataType( + this=typecode.INTERVAL, + expressions=[ + sge.IntervalSpan( + this=sge.Var(this="YEAR"), expression=sge.Var(this="MONTH") + ) + ], + ) + elif short in ("D", "h", "m", "s", "ms", "us", "ns"): + return sge.DataType( + this=typecode.INTERVAL, + expressions=[ + sge.IntervalSpan( + this=sge.Var(this="DAY"), expression=sge.Var(this="SECOND") + ) + ], + ) + else: + raise NotImplementedError( + f"Trino does not support {dtype.unit.name} intervals" + ) + + @classmethod + def _from_sqlglot_UBIGINT(cls): + return dt.Decimal(precision=19, scale=0, nullable=cls.default_nullable) + + @classmethod + def _from_ibis_UInt64(cls, dtype): + return sge.DataType( + this=typecode.DECIMAL, + expressions=[ + sge.DataTypeParam(this=sge.convert(19)), + sge.DataTypeParam(this=sge.convert(0)), + ], + ) + + @classmethod + def _from_sqlglot_UINT(cls): + return dt.Int64(nullable=cls.default_nullable) + + @classmethod + def _from_ibis_UInt32(cls, dtype): + return sge.DataType(this=typecode.BIGINT) + + @classmethod + def _from_sqlglot_USMALLINT(cls): + return dt.Int32(nullable=cls.default_nullable) + + @classmethod + def _from_ibis_UInt16(cls, dtype): + return sge.DataType(this=typecode.INT) + + @classmethod + def _from_sqlglot_UTINYINT(cls): + return dt.Int16(nullable=cls.default_nullable) + + @classmethod + def _from_ibis_UInt8(cls, dtype): + return sge.DataType(this=typecode.SMALLINT) + class DruidType(SqlglotType): # druid doesn't have a sophisticated type system and hive is close enough diff --git a/ibis/backends/base/sqlglot/rewrites.py b/ibis/backends/base/sqlglot/rewrites.py index 4b8341329980..522380d9111e 100644 --- a/ibis/backends/base/sqlglot/rewrites.py +++ b/ibis/backends/base/sqlglot/rewrites.py @@ -8,16 +8,21 @@ import toolz from public import public +import ibis.common.exceptions as com import ibis.expr.datashape as ds import ibis.expr.datatypes as dt import ibis.expr.operations as ops from ibis.common.annotations import attribute from ibis.common.collections import FrozenDict # noqa: TCH001 +from ibis.common.deferred import var from ibis.common.patterns import Object, replace from ibis.common.typing import VarTuple # noqa: TCH001 from ibis.expr.rewrites import p from ibis.expr.schema import Schema +x = var("x") +y = var("y") + @public class Select(ops.Relation): @@ -140,3 +145,45 @@ def sqlize(node): ) step2 = step1.replace(merge_select_select) return step2 + + +@replace(p.WindowFunction(p.First(x, y))) +def rewrite_first_to_first_value(_, x, y): + """Rewrite Ibis's first to first_value when used in a window function.""" + if y is not None: + raise com.UnsupportedOperationError( + "`first` with `where` is unsupported in a window function" + ) + return _.copy(func=ops.FirstValue(x)) + + +@replace(p.WindowFunction(p.Last(x, y))) +def rewrite_last_to_last_value(_, x, y): + """Rewrite Ibis's last to last_value when used in a window function.""" + if y is not None: + raise com.UnsupportedOperationError( + "`last` with `where` is unsupported in a window function" + ) + return _.copy(func=ops.LastValue(x)) + + +@replace(p.WindowFunction(frame=y @ p.WindowFrame(order_by=()))) +def rewrite_empty_order_by_window(_, y): + import ibis + + return _.copy(frame=y.copy(order_by=(ibis.NA,))) + + +@replace(p.WindowFunction(p.RowNumber | p.NTile, y)) +def exclude_unsupported_window_frame_from_row_number(_, y): + return ops.Subtract(_.copy(frame=y.copy(start=None, end=None)), 1) + + +@replace( + p.WindowFunction( + p.Lag | p.Lead | p.PercentRank | p.CumeDist | p.Any | p.All, + y @ p.WindowFrame(start=None), + ) +) +def exclude_unsupported_window_frame_from_ops(_, y): + return _.copy(frame=y.copy(start=None, end=0, order_by=y.order_by or (ops.NULL,))) diff --git a/ibis/backends/conftest.py b/ibis/backends/conftest.py index fe37b82e3526..a0cede0b8588 100644 --- a/ibis/backends/conftest.py +++ b/ibis/backends/conftest.py @@ -537,7 +537,7 @@ def ddl_con(ddl_backend): @pytest.fixture( params=_get_backends_to_test( - keep=("exasol", "mssql", "mysql", "oracle", "postgres", "sqlite", "trino") + keep=("exasol", "mssql", "mysql", "oracle", "postgres", "sqlite") ), scope="session", ) diff --git a/ibis/backends/tests/errors.py b/ibis/backends/tests/errors.py index cc4f2dc55791..ab76ae12a0be 100644 --- a/ibis/backends/tests/errors.py +++ b/ibis/backends/tests/errors.py @@ -73,3 +73,8 @@ from snowflake.connector.errors import ProgrammingError as SnowflakeProgrammingError except ImportError: SnowflakeProgrammingError = None + +try: + from trino.exceptions import TrinoUserError +except ImportError: + TrinoUserError = None diff --git a/ibis/backends/tests/snapshots/test_interactive/test_default_limit/trino/out.sql b/ibis/backends/tests/snapshots/test_interactive/test_default_limit/trino/out.sql new file mode 100644 index 000000000000..b309cd65374d --- /dev/null +++ b/ibis/backends/tests/snapshots/test_interactive/test_default_limit/trino/out.sql @@ -0,0 +1,5 @@ +SELECT + "t0"."id", + "t0"."bool_col" +FROM "functional_alltypes" AS "t0" +LIMIT 11 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_interactive/test_disable_query_limit/trino/out.sql b/ibis/backends/tests/snapshots/test_interactive/test_disable_query_limit/trino/out.sql new file mode 100644 index 000000000000..b309cd65374d --- /dev/null +++ b/ibis/backends/tests/snapshots/test_interactive/test_disable_query_limit/trino/out.sql @@ -0,0 +1,5 @@ +SELECT + "t0"."id", + "t0"."bool_col" +FROM "functional_alltypes" AS "t0" +LIMIT 11 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_interactive/test_interactive_execute_on_repr/trino/out.sql b/ibis/backends/tests/snapshots/test_interactive/test_interactive_execute_on_repr/trino/out.sql new file mode 100644 index 000000000000..6bd0ba8c995d --- /dev/null +++ b/ibis/backends/tests/snapshots/test_interactive/test_interactive_execute_on_repr/trino/out.sql @@ -0,0 +1,3 @@ +SELECT + SUM("t0"."bigint_col") AS "Sum(bigint_col)" +FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_interactive/test_respect_set_limit/trino/out.sql b/ibis/backends/tests/snapshots/test_interactive/test_respect_set_limit/trino/out.sql new file mode 100644 index 000000000000..97338646649f --- /dev/null +++ b/ibis/backends/tests/snapshots/test_interactive/test_respect_set_limit/trino/out.sql @@ -0,0 +1,10 @@ +SELECT + * +FROM ( + SELECT + "t0"."id", + "t0"."bool_col" + FROM "functional_alltypes" AS "t0" + LIMIT 10 +) AS "t2" +LIMIT 11 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_group_by_has_index/trino/out.sql b/ibis/backends/tests/snapshots/test_sql/test_group_by_has_index/trino/out.sql index fc16f2428d16..d3969647c9ea 100644 --- a/ibis/backends/tests/snapshots/test_sql/test_group_by_has_index/trino/out.sql +++ b/ibis/backends/tests/snapshots/test_sql/test_group_by_has_index/trino/out.sql @@ -1,5 +1,5 @@ SELECT - CASE t0.continent + CASE "t0"."continent" WHEN 'NA' THEN 'North America' WHEN 'SA' @@ -15,8 +15,8 @@ SELECT WHEN 'AN' THEN 'Antarctica' ELSE 'Unknown continent' - END AS cont, - SUM(t0.population) AS total_pop -FROM countries AS t0 + END AS "cont", + SUM("t0"."population") AS "total_pop" +FROM "countries" AS "t0" GROUP BY 1 \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_isin_bug/trino/out.sql b/ibis/backends/tests/snapshots/test_sql/test_isin_bug/trino/out.sql index 3f66295a7f5a..c1611d8cecc3 100644 --- a/ibis/backends/tests/snapshots/test_sql/test_isin_bug/trino/out.sql +++ b/ibis/backends/tests/snapshots/test_sql/test_isin_bug/trino/out.sql @@ -1,13 +1,9 @@ SELECT - t0.x IN ( + "t0"."x" IN ( SELECT - t1.x - FROM ( - SELECT - t0.x AS x - FROM t AS t0 - WHERE - t0.x > 2 - ) AS t1 - ) AS "InColumn(x, x)" -FROM t AS t0 \ No newline at end of file + "t0"."x" + FROM "t" AS "t0" + WHERE + "t0"."x" > 2 + ) AS "InSubquery(x)" +FROM "t" AS "t0" \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_union_aliasing/trino/out.sql b/ibis/backends/tests/snapshots/test_sql/test_union_aliasing/trino/out.sql index beed99d33761..f20ebad9894f 100644 --- a/ibis/backends/tests/snapshots/test_sql/test_union_aliasing/trino/out.sql +++ b/ibis/backends/tests/snapshots/test_sql/test_union_aliasing/trino/out.sql @@ -1,75 +1,130 @@ -WITH t0 AS ( - SELECT - t7.field_of_study AS field_of_study, - CAST(ROW(anon_2.years, anon_2.degrees) AS ROW(years VARCHAR, degrees BIGINT)) AS __pivoted__ - FROM humanities AS t7 - JOIN UNNEST(ARRAY[CAST(ROW('1970-71', t7."1970-71") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1975-76', t7."1975-76") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1980-81', t7."1980-81") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1985-86', t7."1985-86") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1990-91', t7."1990-91") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1995-96', t7."1995-96") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2000-01', t7."2000-01") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2005-06', t7."2005-06") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2010-11', t7."2010-11") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2011-12', t7."2011-12") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2012-13', t7."2012-13") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2013-14', t7."2013-14") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2014-15', t7."2014-15") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2015-16', t7."2015-16") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2016-17', t7."2016-17") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2017-18', t7."2017-18") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2018-19', t7."2018-19") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2019-20', t7."2019-20") AS ROW(years VARCHAR, degrees BIGINT))]) AS anon_2(years, degrees) - ON TRUE -), t1 AS ( - SELECT - t0.field_of_study AS field_of_study, - t0.__pivoted__.years AS years, - t0.__pivoted__.degrees AS degrees - FROM t0 -), t2 AS ( - SELECT - t1.field_of_study AS field_of_study, - t1.years AS years, - t1.degrees AS degrees, - FIRST_VALUE(t1.degrees) OVER (PARTITION BY t1.field_of_study ORDER BY t1.years ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS earliest_degrees, - LAST_VALUE(t1.degrees) OVER (PARTITION BY t1.field_of_study ORDER BY t1.years ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS latest_degrees - FROM t1 -), t3 AS ( - SELECT - t2.field_of_study AS field_of_study, - t2.years AS years, - t2.degrees AS degrees, - t2.earliest_degrees AS earliest_degrees, - t2.latest_degrees AS latest_degrees, - t2.latest_degrees - t2.earliest_degrees AS diff - FROM t2 -), t4 AS ( - SELECT - t3.field_of_study AS field_of_study, - ARBITRARY(t3.diff) AS diff - FROM t3 - GROUP BY - 1 -), anon_1 AS ( +SELECT + "t10"."field_of_study", + "t10"."diff" +FROM ( SELECT - t4.field_of_study AS field_of_study, - t4.diff AS diff - FROM t4 + "t5"."field_of_study", + "t5"."diff" + FROM ( + SELECT + "t4"."field_of_study", + ARBITRARY("t4"."diff") AS "diff" + FROM ( + SELECT + "t3"."field_of_study", + "t3"."years", + "t3"."degrees", + "t3"."earliest_degrees", + "t3"."latest_degrees", + "t3"."latest_degrees" - "t3"."earliest_degrees" AS "diff" + FROM ( + SELECT + "t2"."field_of_study", + "t2"."years", + "t2"."degrees", + FIRST_VALUE("t2"."degrees") OVER (PARTITION BY "t2"."field_of_study" ORDER BY "t2"."years" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS "earliest_degrees", + LAST_VALUE("t2"."degrees") OVER (PARTITION BY "t2"."field_of_study" ORDER BY "t2"."years" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS "latest_degrees" + FROM ( + SELECT + "t1"."field_of_study", + "t1"."__pivoted__"."years" AS "years", + "t1"."__pivoted__"."degrees" AS "degrees" + FROM ( + SELECT + "t0"."field_of_study", + IF(_u.pos = _u_2.pos_2, _u_2."__pivoted__") AS "__pivoted__" + FROM "humanities" AS "t0" + CROSS JOIN UNNEST(SEQUENCE( + 1, + GREATEST( + CARDINALITY( + ARRAY[CAST(ROW('1970-71', "t0"."1970-71") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1975-76', "t0"."1975-76") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1980-81', "t0"."1980-81") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1985-86', "t0"."1985-86") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1990-91', "t0"."1990-91") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1995-96', "t0"."1995-96") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2000-01', "t0"."2000-01") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2005-06', "t0"."2005-06") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2010-11', "t0"."2010-11") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2011-12', "t0"."2011-12") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2012-13', "t0"."2012-13") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2013-14', "t0"."2013-14") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2014-15', "t0"."2014-15") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2015-16', "t0"."2015-16") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2016-17', "t0"."2016-17") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2017-18', "t0"."2017-18") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2018-19', "t0"."2018-19") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2019-20', "t0"."2019-20") AS ROW(years VARCHAR, degrees BIGINT))] + ) + ) + )) AS _u(pos) + CROSS JOIN UNNEST(ARRAY[CAST(ROW('1970-71', "t0"."1970-71") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1975-76', "t0"."1975-76") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1980-81', "t0"."1980-81") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1985-86', "t0"."1985-86") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1990-91', "t0"."1990-91") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1995-96', "t0"."1995-96") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2000-01', "t0"."2000-01") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2005-06', "t0"."2005-06") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2010-11', "t0"."2010-11") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2011-12', "t0"."2011-12") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2012-13', "t0"."2012-13") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2013-14', "t0"."2013-14") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2014-15', "t0"."2014-15") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2015-16', "t0"."2015-16") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2016-17', "t0"."2016-17") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2017-18', "t0"."2017-18") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2018-19', "t0"."2018-19") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2019-20', "t0"."2019-20") AS ROW(years VARCHAR, degrees BIGINT))]) WITH ORDINALITY AS _u_2("__pivoted__", pos_2) + WHERE + _u.pos = _u_2.pos_2 + OR ( + _u.pos > CARDINALITY( + ARRAY[CAST(ROW('1970-71', "t0"."1970-71") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1975-76', "t0"."1975-76") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1980-81', "t0"."1980-81") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1985-86', "t0"."1985-86") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1990-91', "t0"."1990-91") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1995-96', "t0"."1995-96") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2000-01', "t0"."2000-01") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2005-06', "t0"."2005-06") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2010-11', "t0"."2010-11") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2011-12', "t0"."2011-12") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2012-13', "t0"."2012-13") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2013-14', "t0"."2013-14") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2014-15', "t0"."2014-15") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2015-16', "t0"."2015-16") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2016-17', "t0"."2016-17") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2017-18', "t0"."2017-18") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2018-19', "t0"."2018-19") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2019-20', "t0"."2019-20") AS ROW(years VARCHAR, degrees BIGINT))] + ) + AND _u_2.pos_2 = CARDINALITY( + ARRAY[CAST(ROW('1970-71', "t0"."1970-71") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1975-76', "t0"."1975-76") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1980-81', "t0"."1980-81") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1985-86', "t0"."1985-86") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1990-91', "t0"."1990-91") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1995-96', "t0"."1995-96") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2000-01', "t0"."2000-01") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2005-06', "t0"."2005-06") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2010-11', "t0"."2010-11") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2011-12', "t0"."2011-12") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2012-13', "t0"."2012-13") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2013-14', "t0"."2013-14") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2014-15', "t0"."2014-15") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2015-16', "t0"."2015-16") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2016-17', "t0"."2016-17") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2017-18', "t0"."2017-18") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2018-19', "t0"."2018-19") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2019-20', "t0"."2019-20") AS ROW(years VARCHAR, degrees BIGINT))] + ) + ) + ) AS "t1" + ) AS "t2" + ) AS "t3" + ) AS "t4" + GROUP BY + 1 + ) AS "t5" ORDER BY - t4.diff DESC + "t5"."diff" DESC LIMIT 10 -), t5 AS ( + UNION ALL SELECT - t4.field_of_study AS field_of_study, - t4.diff AS diff - FROM t4 + "t5"."field_of_study", + "t5"."diff" + FROM ( + SELECT + "t4"."field_of_study", + ARBITRARY("t4"."diff") AS "diff" + FROM ( + SELECT + "t3"."field_of_study", + "t3"."years", + "t3"."degrees", + "t3"."earliest_degrees", + "t3"."latest_degrees", + "t3"."latest_degrees" - "t3"."earliest_degrees" AS "diff" + FROM ( + SELECT + "t2"."field_of_study", + "t2"."years", + "t2"."degrees", + FIRST_VALUE("t2"."degrees") OVER (PARTITION BY "t2"."field_of_study" ORDER BY "t2"."years" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS "earliest_degrees", + LAST_VALUE("t2"."degrees") OVER (PARTITION BY "t2"."field_of_study" ORDER BY "t2"."years" ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS "latest_degrees" + FROM ( + SELECT + "t1"."field_of_study", + "t1"."__pivoted__"."years" AS "years", + "t1"."__pivoted__"."degrees" AS "degrees" + FROM ( + SELECT + "t0"."field_of_study", + IF(_u.pos = _u_2.pos_2, _u_2."__pivoted__") AS "__pivoted__" + FROM "humanities" AS "t0" + CROSS JOIN UNNEST(SEQUENCE( + 1, + GREATEST( + CARDINALITY( + ARRAY[CAST(ROW('1970-71', "t0"."1970-71") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1975-76', "t0"."1975-76") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1980-81', "t0"."1980-81") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1985-86', "t0"."1985-86") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1990-91', "t0"."1990-91") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1995-96', "t0"."1995-96") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2000-01', "t0"."2000-01") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2005-06', "t0"."2005-06") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2010-11', "t0"."2010-11") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2011-12', "t0"."2011-12") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2012-13', "t0"."2012-13") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2013-14', "t0"."2013-14") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2014-15', "t0"."2014-15") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2015-16', "t0"."2015-16") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2016-17', "t0"."2016-17") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2017-18', "t0"."2017-18") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2018-19', "t0"."2018-19") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2019-20', "t0"."2019-20") AS ROW(years VARCHAR, degrees BIGINT))] + ) + ) + )) AS _u(pos) + CROSS JOIN UNNEST(ARRAY[CAST(ROW('1970-71', "t0"."1970-71") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1975-76', "t0"."1975-76") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1980-81', "t0"."1980-81") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1985-86', "t0"."1985-86") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1990-91', "t0"."1990-91") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1995-96', "t0"."1995-96") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2000-01', "t0"."2000-01") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2005-06', "t0"."2005-06") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2010-11', "t0"."2010-11") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2011-12', "t0"."2011-12") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2012-13', "t0"."2012-13") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2013-14', "t0"."2013-14") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2014-15', "t0"."2014-15") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2015-16', "t0"."2015-16") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2016-17', "t0"."2016-17") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2017-18', "t0"."2017-18") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2018-19', "t0"."2018-19") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2019-20', "t0"."2019-20") AS ROW(years VARCHAR, degrees BIGINT))]) WITH ORDINALITY AS _u_2("__pivoted__", pos_2) + WHERE + _u.pos = _u_2.pos_2 + OR ( + _u.pos > CARDINALITY( + ARRAY[CAST(ROW('1970-71', "t0"."1970-71") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1975-76', "t0"."1975-76") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1980-81', "t0"."1980-81") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1985-86', "t0"."1985-86") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1990-91', "t0"."1990-91") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1995-96', "t0"."1995-96") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2000-01', "t0"."2000-01") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2005-06', "t0"."2005-06") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2010-11', "t0"."2010-11") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2011-12', "t0"."2011-12") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2012-13', "t0"."2012-13") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2013-14', "t0"."2013-14") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2014-15', "t0"."2014-15") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2015-16', "t0"."2015-16") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2016-17', "t0"."2016-17") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2017-18', "t0"."2017-18") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2018-19', "t0"."2018-19") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2019-20', "t0"."2019-20") AS ROW(years VARCHAR, degrees BIGINT))] + ) + AND _u_2.pos_2 = CARDINALITY( + ARRAY[CAST(ROW('1970-71', "t0"."1970-71") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1975-76', "t0"."1975-76") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1980-81', "t0"."1980-81") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1985-86', "t0"."1985-86") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1990-91', "t0"."1990-91") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('1995-96', "t0"."1995-96") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2000-01', "t0"."2000-01") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2005-06', "t0"."2005-06") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2010-11', "t0"."2010-11") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2011-12', "t0"."2011-12") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2012-13', "t0"."2012-13") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2013-14', "t0"."2013-14") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2014-15', "t0"."2014-15") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2015-16', "t0"."2015-16") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2016-17', "t0"."2016-17") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2017-18', "t0"."2017-18") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2018-19', "t0"."2018-19") AS ROW(years VARCHAR, degrees BIGINT)), CAST(ROW('2019-20', "t0"."2019-20") AS ROW(years VARCHAR, degrees BIGINT))] + ) + ) + ) AS "t1" + ) AS "t2" + ) AS "t3" + ) AS "t4" + GROUP BY + 1 + ) AS "t5" WHERE - t4.diff < 0 -), anon_3 AS ( - SELECT - t5.field_of_study AS field_of_study, - t5.diff AS diff - FROM t5 + "t5"."diff" < 0 ORDER BY - t5.diff ASC + "t5"."diff" ASC LIMIT 10 -) -SELECT - t6.field_of_study, - t6.diff -FROM ( - SELECT - anon_1.field_of_study AS field_of_study, - anon_1.diff AS diff - FROM anon_1 - UNION ALL - SELECT - anon_3.field_of_study AS field_of_study, - anon_3.diff AS diff - FROM anon_3 -) AS t6 \ No newline at end of file +) AS "t10" \ No newline at end of file diff --git a/ibis/backends/tests/test_aggregation.py b/ibis/backends/tests/test_aggregation.py index 7cfedc8f5f53..ae49d79847d9 100644 --- a/ibis/backends/tests/test_aggregation.py +++ b/ibis/backends/tests/test_aggregation.py @@ -22,6 +22,7 @@ Py4JError, PySparkAnalysisException, SnowflakeProgrammingError, + TrinoUserError, ) from ibis.legacy.udf.vectorized import reduction @@ -1053,10 +1054,11 @@ def test_quantile( reason="Correlation with how='sample' is not supported.", ), pytest.mark.notyet( - ["trino", "postgres", "duckdb", "snowflake", "oracle"], + ["postgres", "duckdb", "snowflake", "oracle"], raises=ValueError, reason="XXXXSQLExprTranslator only implements population correlation coefficient", ), + pytest.mark.notyet(["trino"], raises=com.UnsupportedOperationError), ], ), param( @@ -1286,7 +1288,6 @@ def test_date_quantile(alltypes, func): "::", id="expr", marks=[ - pytest.mark.notyet(["trino"], raises=com.UnsupportedOperationError), pytest.mark.notyet( ["bigquery"], raises=GoogleBadRequest, @@ -1310,13 +1311,19 @@ def test_date_quantile(alltypes, func): param( lambda t: t.string_col.isin(["1", "7"]), lambda t: t.string_col.isin(["1", "7"]), - marks=pytest.mark.notimpl(["dask"], raises=com.OperationNotDefinedError), + marks=[ + pytest.mark.notyet(["trino"], raises=TrinoUserError), + pytest.mark.notimpl(["dask"], raises=com.OperationNotDefinedError), + ], id="is_in", ), param( lambda t: t.string_col.notin(["1", "7"]), lambda t: ~t.string_col.isin(["1", "7"]), - marks=pytest.mark.notimpl(["dask"], raises=com.OperationNotDefinedError), + marks=[ + pytest.mark.notyet(["trino"], raises=TrinoUserError), + pytest.mark.notimpl(["dask"], raises=com.OperationNotDefinedError), + ], id="not_in", ), ], @@ -1597,8 +1604,9 @@ def test_grouped_case(backend, con): ) @pytest.mark.notyet(["impala", "flink"], raises=com.UnsupportedOperationError) @pytest.mark.notyet(["clickhouse"], raises=ClickHouseDatabaseError) -@pytest.mark.notyet(["druid", "trino"], raises=sa.exc.ProgrammingError) +@pytest.mark.notyet(["druid"], raises=sa.exc.ProgrammingError) @pytest.mark.notyet(["snowflake"], raises=SnowflakeProgrammingError) +@pytest.mark.notyet(["trino"], raises=TrinoUserError) @pytest.mark.notyet(["mysql"], raises=sa.exc.NotSupportedError) @pytest.mark.notyet(["oracle"], raises=sa.exc.DatabaseError) @pytest.mark.notyet(["pyspark"], raises=PySparkAnalysisException) diff --git a/ibis/backends/tests/test_array.py b/ibis/backends/tests/test_array.py index 244bf11eb92c..3629f0aef864 100644 --- a/ibis/backends/tests/test_array.py +++ b/ibis/backends/tests/test_array.py @@ -23,6 +23,7 @@ PolarsComputeError, Py4JJavaError, PySparkAnalysisException, + TrinoUserError, ) pytestmark = [ @@ -95,8 +96,7 @@ def test_array_concat(con): right = ibis.literal([2, 1]) expr = left + right result = con.execute(expr.name("tmp")) - expected = np.array([1, 2, 3, 2, 1]) - assert np.array_equal(result, expected) + assert sorted(result) == sorted([1, 2, 3, 2, 1]) # Issues #2370 @@ -113,10 +113,11 @@ def test_array_concat_variadic(con): # Issues #2370 @pytest.mark.notimpl(["flink"], raises=com.OperationNotDefinedError) @pytest.mark.notyet( - ["postgres", "trino"], + ["postgres"], raises=sa.exc.ProgrammingError, reason="backend can't infer the type of an empty array", ) +@pytest.mark.notyet(["trino"], raises=TrinoUserError) def test_array_concat_some_empty(con): left = ibis.literal([]) right = ibis.literal([2, 1]) @@ -401,17 +402,17 @@ def test_array_slice(backend, start, stop): param({"a": [[1, 2], [4]]}, {"a": [[2, 3], [5]]}, id="no_nulls"), ], ) -def test_array_map(backend, con, input, output): +def test_array_map(con, input, output): t = ibis.memtable(input, schema=ibis.schema(dict(a="!array"))) expected = pd.DataFrame(output) expr = t.select(a=t.a.map(lambda x: x + 1)) result = con.execute(expr) - backend.assert_frame_equal(result, expected) + assert frozenset(map(tuple, result["a"])) == frozenset(map(tuple, expected["a"])) expr = t.select(a=t.a.map(functools.partial(lambda x, y: x + y, y=1))) result = con.execute(expr) - backend.assert_frame_equal(result, expected) + assert frozenset(map(tuple, result["a"])) == frozenset(map(tuple, expected["a"])) @builtin_array @@ -445,23 +446,22 @@ def test_array_map(backend, con, input, output): param({"a": [[1, 2], [4]]}, {"a": [[2], [4]]}, id="no_nulls"), ], ) -def test_array_filter(backend, con, input, output): +def test_array_filter(con, input, output): t = ibis.memtable(input, schema=ibis.schema(dict(a="!array"))) expected = pd.DataFrame(output) expr = t.select(a=t.a.filter(lambda x: x > 1)) result = con.execute(expr) - backend.assert_frame_equal(result, expected) + assert frozenset(map(tuple, result["a"])) == frozenset(map(tuple, expected["a"])) expr = t.select(a=t.a.filter(functools.partial(lambda x, y: x > y, y=1))) result = con.execute(expr) - backend.assert_frame_equal(result, expected) + assert frozenset(map(tuple, result["a"])) == frozenset(map(tuple, expected["a"])) @builtin_array @pytest.mark.notimpl( - ["mssql", "polars", "postgres"], - raises=com.OperationNotDefinedError, + ["mssql", "polars", "postgres"], raises=com.OperationNotDefinedError ) @pytest.mark.notimpl(["dask"], raises=com.OperationNotDefinedError) @pytest.mark.never(["impala"], reason="array_types table isn't defined") @@ -475,31 +475,29 @@ def test_array_contains(backend, con): @builtin_array @pytest.mark.notimpl( - ["dask", "impala", "mssql", "polars"], - raises=com.OperationNotDefinedError, + ["dask", "impala", "mssql", "polars"], raises=com.OperationNotDefinedError ) @pytest.mark.broken( ["datafusion"], reason="internal error as of 34.0.0", raises=Exception ) def test_array_position(backend, con): - t = ibis.memtable({"a": [[1], [], [42, 42], []]}) - expr = t.a.index(42) + t = ibis.memtable({"a": [[1], [], [42, 42], []], "id": range(4)}) + expr = t.mutate(idx=t.a.index(42)).order_by("id") result = con.execute(expr) - expected = pd.Series([-1, -1, 0, -1], dtype="object") - backend.assert_series_equal(result, expected, check_names=False, check_dtype=False) + expected = pd.Series([-1, -1, 0, -1], name="idx") + backend.assert_series_equal(result.idx, expected, check_dtype=False) @builtin_array @pytest.mark.notimpl( - ["dask", "impala", "mssql", "polars"], - raises=com.OperationNotDefinedError, + ["dask", "impala", "mssql", "polars"], raises=com.OperationNotDefinedError ) -def test_array_remove(backend, con): +def test_array_remove(con): t = ibis.memtable({"a": [[3, 2], [], [42, 2], [2, 2], []]}) expr = t.a.remove(2) result = con.execute(expr) - expected = pd.Series([[3], [], [42], [], []], dtype="object") - backend.assert_series_equal(result, expected, check_names=False) + expected = frozenset(map(tuple, ([3], [], [42], [], []))) + assert frozenset(map(tuple, result.values)) == expected @builtin_array @@ -540,12 +538,12 @@ def test_array_remove(backend, con): ), ], ) -def test_array_unique(backend, con, input, expected): +def test_array_unique(con, input, expected): t = ibis.memtable(input) expr = t.a.unique() - result = con.execute(expr).map(set, na_action="ignore") - expected = pd.Series(expected, dtype="object") - backend.assert_series_equal(result, expected, check_names=False) + result = con.execute(expr).map(frozenset, na_action="ignore") + expected = pd.Series(expected, dtype="object").map(frozenset, na_action="ignore") + assert set(result) == set(expected) @builtin_array @@ -554,11 +552,11 @@ def test_array_unique(backend, con, input, expected): raises=com.OperationNotDefinedError, ) def test_array_sort(backend, con): - t = ibis.memtable({"a": [[3, 2], [], [42, 42], []]}) - expr = t.a.sort() + t = ibis.memtable({"a": [[3, 2], [], [42, 42], []], "id": range(4)}) + expr = t.mutate(a=t.a.sort()).order_by("id") result = con.execute(expr) expected = pd.Series([[2, 3], [], [42, 42], []], dtype="object") - backend.assert_series_equal(result, expected, check_names=False) + backend.assert_series_equal(result.a, expected, check_names=False) @builtin_array @@ -575,11 +573,10 @@ def test_array_union(con): t = ibis.memtable({"a": [[3, 2], [], []], "b": [[1, 3], [None], [5]]}) expr = t.a.union(t.b) result = con.execute(expr).map(set, na_action="ignore") - expected = pd.Series([{1, 2, 3}, {None}, {5}], dtype="object") - assert len(result) == len(expected) - for i, (lhs, rhs) in enumerate(zip(result, expected)): - assert lhs == rhs, f"row {i:d} differs" + # turn everything into frozensets for unordered comparison + expected = frozenset(((1, 2, 3), (None,), (5,))) + assert frozenset(map(tuple, result.values)) == expected @pytest.mark.notimpl( @@ -628,6 +625,9 @@ def test_array_intersect(con, data): ) @pytest.mark.notimpl(["postgres"], raises=sa.exc.ProgrammingError) @pytest.mark.notimpl(["datafusion"], raises=com.OperationNotDefinedError) +@pytest.mark.broken( + ["trino"], reason="inserting maps into structs doesn't work", raises=TrinoUserError +) def test_unnest_struct(con): data = {"value": [[{"a": 1}, {"a": 2}], [{"a": 3}, {"a": 4}]]} t = ibis.memtable(data, schema=ibis.schema({"value": "!array>"})) @@ -683,6 +683,9 @@ def test_zip(backend): reason="pyspark doesn't seem to support field selection on explode", raises=PySparkAnalysisException, ) +@pytest.mark.broken( + ["trino"], reason="inserting maps into structs doesn't work", raises=TrinoUserError +) def test_array_of_struct_unnest(con): jobs = ibis.memtable( { @@ -767,12 +770,14 @@ def flatten_data(): @pytest.mark.notyet(["datafusion"], raises=com.OperationNotDefinedError) def test_array_flatten(backend, flatten_data, column, expected): data = flatten_data[column] - t = ibis.memtable( - {column: data["data"]}, schema=ibis.schema({column: data["type"]}) - ) + t = ibis.memtable({column: data["data"]}, schema={column: data["type"]}) expr = t[column].flatten() result = backend.connection.execute(expr) - backend.assert_series_equal(result, expected, check_names=False) + backend.assert_series_equal( + result.sort_values().reset_index(drop=True), + expected.sort_values().reset_index(drop=True), + check_names=False, + ) @pytest.mark.notyet( @@ -944,7 +949,7 @@ def swap(token): marks=[ pytest.mark.notyet( ["trino"], - raises=sa.exc.ProgrammingError, + raises=TrinoUserError, reason="trino doesn't support timestamp with time zone arguments to its sequence function", ), pytest.mark.notyet( diff --git a/ibis/backends/tests/test_asof_join.py b/ibis/backends/tests/test_asof_join.py index ad7678665678..904948e03051 100644 --- a/ibis/backends/tests/test_asof_join.py +++ b/ibis/backends/tests/test_asof_join.py @@ -84,7 +84,7 @@ def time_keyed_right(time_keyed_df2): ("forward", operator.le), ], ) -@pytest.mark.notimpl(["datafusion", "snowflake"]) +@pytest.mark.notimpl(["datafusion", "snowflake", "trino"]) def test_asof_join(con, time_left, time_right, time_df1, time_df2, direction, op): on = op(time_left["time"], time_right["time"]) expr = time_left.asof_join(time_right, on=on, predicates="group") @@ -112,7 +112,7 @@ def test_asof_join(con, time_left, time_right, time_df1, time_df2, direction, op @pytest.mark.broken( ["clickhouse"], raises=AssertionError, reason="`time` is truncated to seconds" ) -@pytest.mark.notimpl(["datafusion", "snowflake"]) +@pytest.mark.notimpl(["datafusion", "snowflake", "trino"]) def test_keyed_asof_join_with_tolerance( con, time_keyed_left, diff --git a/ibis/backends/tests/test_binary.py b/ibis/backends/tests/test_binary.py index 310c80bcca00..e476ac1d388a 100644 --- a/ibis/backends/tests/test_binary.py +++ b/ibis/backends/tests/test_binary.py @@ -14,18 +14,13 @@ "duckdb": "BLOB", "snowflake": "BINARY", "sqlite": "blob", - "trino": "STRING", + "trino": "varbinary", "postgres": "bytea", "flink": "BINARY(1) NOT NULL", } -@pytest.mark.broken( - ["trino"], - "(builtins.AttributeError) 'bytes' object has no attribute 'encode'", - raises=sqlalchemy.exc.StatementError, -) -@pytest.mark.broken( +@pytest.mark.notimpl( ["clickhouse", "impala"], "Unsupported type: Binary(nullable=True)", raises=NotImplementedError, diff --git a/ibis/backends/tests/test_client.py b/ibis/backends/tests/test_client.py index 215868e8bc31..8732f8f29652 100644 --- a/ibis/backends/tests/test_client.py +++ b/ibis/backends/tests/test_client.py @@ -867,7 +867,7 @@ def test_in_memory_table(backend, con, arg, lambda_, expected, monkeypatch): monkeypatch.setattr(ibis.options, "default_backend", con) expr = lambda_(arg) - result = con.execute(expr) + result = con.execute(expr.order_by(expr.columns[0])) backend.assert_frame_equal(result, expected) @@ -875,7 +875,7 @@ def test_filter_memory_table(backend, con, monkeypatch): monkeypatch.setattr(ibis.options, "default_backend", con) t = ibis.memtable([(1, 2), (3, 4), (5, 6)], columns=["x", "y"]) - expr = t.filter(t.x > 1) + expr = t.filter(t.x > 1).order_by("x") expected = pd.DataFrame({"x": [3, 5], "y": [4, 6]}) result = con.execute(expr) backend.assert_frame_equal(result, expected) diff --git a/ibis/backends/tests/test_export.py b/ibis/backends/tests/test_export.py index a4912904c825..c809c3b9f030 100644 --- a/ibis/backends/tests/test_export.py +++ b/ibis/backends/tests/test_export.py @@ -16,6 +16,7 @@ PyDeltaTableError, PySparkAnalysisException, SnowflakeProgrammingError, + TrinoUserError, ) from ibis.formats.pyarrow import PyArrowType @@ -354,10 +355,9 @@ def test_table_to_csv_writer_kwargs(delimiter, tmp_path, awards_players): marks=[ pytest.mark.notyet(["impala"], reason="precision not supported"), pytest.mark.notyet(["duckdb"], reason="precision is out of range"), - pytest.mark.notyet( - ["druid", "mssql", "trino"], raises=sa.exc.ProgrammingError - ), + pytest.mark.notyet(["druid", "mssql"], raises=sa.exc.ProgrammingError), pytest.mark.notyet(["snowflake"], raises=SnowflakeProgrammingError), + pytest.mark.notyet(["trino"], raises=TrinoUserError), pytest.mark.notyet(["oracle"], raises=sa.exc.DatabaseError), pytest.mark.notyet(["mysql"], raises=sa.exc.OperationalError), pytest.mark.notyet( diff --git a/ibis/backends/tests/test_generic.py b/ibis/backends/tests/test_generic.py index 4b92bfc0c3e6..c6fdbb308e06 100644 --- a/ibis/backends/tests/test_generic.py +++ b/ibis/backends/tests/test_generic.py @@ -3,6 +3,7 @@ import contextlib import datetime import decimal +from collections import Counter from operator import invert, methodcaller, neg import numpy as np @@ -23,6 +24,7 @@ GoogleBadRequest, ImpalaHiveServer2Error, SnowflakeProgrammingError, + TrinoUserError, ) from ibis.common.annotations import ValidationError @@ -925,10 +927,9 @@ def test_literal_na(con, dtype): @pytest.mark.notimpl(["exasol"]) def test_memtable_bool_column(backend, con): - t = ibis.memtable({"a": [True, False, True]}) - backend.assert_series_equal( - con.execute(t.a), pd.Series([True, False, True], name="a") - ) + data = [True, False, True] + t = ibis.memtable({"a": data}) + assert Counter(con.execute(t.a)) == Counter(data) @pytest.mark.broken( @@ -953,7 +954,7 @@ def test_memtable_construct(backend, con, monkeypatch): ) t = ibis.memtable(pa_t) backend.assert_frame_equal( - t.execute().fillna(pd.NA), pa_t.to_pandas().fillna(pd.NA) + t.order_by("a").execute().fillna(pd.NA), pa_t.to_pandas().fillna(pd.NA) ) @@ -1025,6 +1026,11 @@ def query(t, group_cols): reason="backend doesn't support arrays and we don't implement pivot_longer with unions yet", raises=com.OperationNotDefinedError, ) +@pytest.mark.broken( + ["trino"], + reason="invalid code generated for unnesting a struct", + raises=TrinoUserError, +) def test_pivot_longer(backend): diamonds = backend.diamonds df = diamonds.execute() @@ -1311,11 +1317,7 @@ def test_hash_consistent(backend, alltypes): pytest.mark.never( ["clickhouse", "flink"], reason="casts to 1672531200" ), - pytest.mark.notyet( - ["trino"], - raises=sa.exc.ProgrammingError, - reason="raises TrinoUserError", - ), + pytest.mark.notyet(["trino"], raises=TrinoUserError), pytest.mark.broken(["datafusion"], reason="casts to the wrong value"), pytest.mark.broken(["polars"], reason="casts to 1672531200000000000"), ], @@ -1326,11 +1328,7 @@ def test_hash_consistent(backend, alltypes): 1672531200, marks=[ pytest.mark.notyet(["duckdb"], reason="casts to None"), - pytest.mark.notyet( - ["trino"], - raises=sa.exc.ProgrammingError, - reason="raises TrinoUserError", - ), + pytest.mark.notyet(["trino"], raises=TrinoUserError), pytest.mark.broken(["polars"], reason="casts to 1672531200000000000"), pytest.mark.broken(["datafusion"], reason="casts to 1672531200000000"), ], @@ -1367,7 +1365,7 @@ def test_try_cast_table(backend, con): t = ibis.memtable(df) backend.assert_frame_equal( - con.execute(t.try_cast({"a": "int", "b": "float"})), expected + con.execute(t.try_cast({"a": "int", "b": "float"}).order_by("a")), expected ) @@ -1402,11 +1400,7 @@ def test_try_cast_table(backend, con): ["clickhouse", "polars", "flink"], reason="casts this to to a number", ), - pytest.mark.notyet( - ["trino"], - raises=sa.exc.ProgrammingError, - reason="raises TrinoUserError", - ), + pytest.mark.notyet(["trino"], raises=TrinoUserError), ], ), ], @@ -1552,7 +1546,7 @@ def test_static_table_slice(backend, slc, expected_count_fn): ids=str, ) @pytest.mark.notyet( - ["mysql", "trino"], + ["mysql"], raises=sa.exc.ProgrammingError, reason="backend doesn't support dynamic limit/offset", ) @@ -1561,6 +1555,11 @@ def test_static_table_slice(backend, slc, expected_count_fn): raises=SnowflakeProgrammingError, reason="backend doesn't support dynamic limit/offset", ) +@pytest.mark.notyet( + ["trino"], + raises=TrinoUserError, + reason="backend doesn't support dynamic limit/offset", +) @pytest.mark.notimpl( ["mssql"], raises=sa.exc.CompileError, @@ -1605,7 +1604,7 @@ def test_dynamic_table_slice(backend, slc, expected_count_fn): @pytest.mark.notyet( - ["mysql", "trino"], + ["mysql"], raises=sa.exc.ProgrammingError, reason="backend doesn't support dynamic limit/offset", ) @@ -1615,9 +1614,11 @@ def test_dynamic_table_slice(backend, slc, expected_count_fn): reason="backend doesn't support dynamic limit/offset", ) @pytest.mark.notimpl( - ["exasol"], - raises=sa.exc.CompileError, + ["trino"], + raises=TrinoUserError, + reason="backend doesn't support dynamic limit/offset", ) +@pytest.mark.notimpl(["exasol"], raises=sa.exc.CompileError) @pytest.mark.notyet( ["clickhouse"], raises=ClickHouseDatabaseError, diff --git a/ibis/backends/tests/test_numeric.py b/ibis/backends/tests/test_numeric.py index 857918c865b5..70e95b122cce 100644 --- a/ibis/backends/tests/test_numeric.py +++ b/ibis/backends/tests/test_numeric.py @@ -24,6 +24,7 @@ ImpalaHiveServer2Error, Py4JError, SnowflakeProgrammingError, + TrinoUserError, ) from ibis.expr import datatypes as dt from ibis.tests.util import assert_equal @@ -160,7 +161,7 @@ "impala": "DECIMAL(2,1)", "snowflake": "INTEGER", "sqlite": "real", - "trino": "double", + "trino": "real", "duckdb": "FLOAT", "postgres": "numeric", "flink": "FLOAT NOT NULL", @@ -191,7 +192,7 @@ "impala": "DECIMAL(2,1)", "snowflake": "INTEGER", "sqlite": "real", - "trino": "double", + "trino": "real", "duckdb": "FLOAT", "postgres": "numeric", "flink": "FLOAT NOT NULL", @@ -246,7 +247,7 @@ def test_numeric_literal(con, backend, expr, expected_types): "bigquery": decimal.Decimal("1.1"), "snowflake": decimal.Decimal("1.1"), "sqlite": 1.1, - "trino": 1.1, + "trino": decimal.Decimal("1.1"), "dask": decimal.Decimal("1.1"), "duckdb": decimal.Decimal("1.1"), "postgres": 1.1, @@ -263,7 +264,7 @@ def test_numeric_literal(con, backend, expr, expected_types): "bigquery": "NUMERIC", "snowflake": "DECIMAL", "sqlite": "real", - "trino": "decimal(2,1)", + "trino": "decimal(18,3)", "duckdb": "DECIMAL(18,3)", "postgres": "numeric", "flink": "DECIMAL(38, 18) NOT NULL", @@ -298,7 +299,7 @@ def test_numeric_literal(con, backend, expr, expected_types): "bigquery": decimal.Decimal("1.1"), "snowflake": decimal.Decimal("1.1"), "sqlite": 1.1, - "trino": 1.1, + "trino": decimal.Decimal("1.1"), "duckdb": decimal.Decimal("1.100000000"), "postgres": 1.1, "pandas": decimal.Decimal("1.1"), @@ -317,7 +318,7 @@ def test_numeric_literal(con, backend, expr, expected_types): "clickhouse": "Decimal(38, 9)", "snowflake": "DECIMAL", "sqlite": "real", - "trino": "decimal(2,1)", + "trino": "decimal(38,9)", "duckdb": "DECIMAL(38,9)", "postgres": "numeric", "flink": "DECIMAL(38, 9) NOT NULL", @@ -346,7 +347,6 @@ def test_numeric_literal(con, backend, expr, expected_types): { "bigquery": decimal.Decimal("1.1"), "sqlite": 1.1, - "trino": 1.1, "dask": decimal.Decimal("1.1"), "postgres": 1.1, "pandas": decimal.Decimal("1.1"), @@ -381,7 +381,12 @@ def test_numeric_literal(con, backend, expr, expected_types): raises=ImpalaHiveServer2Error, ), pytest.mark.broken( - ["duckdb"], "Unsupported precision.", raises=DuckDBParserException + ["duckdb"], + reason="Unsupported precision.", + raises=DuckDBParserException, + ), + pytest.mark.broken( + ["trino"], reason="Unsupported precision.", raises=TrinoUserError ), pytest.mark.notyet(["datafusion"], raises=Exception), pytest.mark.notyet( @@ -408,7 +413,6 @@ def test_numeric_literal(con, backend, expr, expected_types): { "bigquery": "FLOAT64", "sqlite": "real", - "trino": "decimal(2,1)", "postgres": "numeric", "impala": "DOUBLE", "duckdb": "FLOAT", @@ -459,6 +463,11 @@ def test_numeric_literal(con, backend, expr, expected_types): "(oracledb.exceptions.DatabaseError) DPY-4004: invalid number", raises=sa.exc.DatabaseError, ), + pytest.mark.notyet( + ["trino"], + raises=TrinoUserError, + reason="can't cast infinity to decimal", + ), pytest.mark.notyet( ["flink"], "Infinity is not supported in Flink SQL", @@ -488,7 +497,6 @@ def test_numeric_literal(con, backend, expr, expected_types): { "bigquery": "FLOAT64", "sqlite": "real", - "trino": "decimal(2,1)", "postgres": "numeric", "impala": "DOUBLE", "duckdb": "FLOAT", @@ -549,6 +557,11 @@ def test_numeric_literal(con, backend, expr, expected_types): "infinity is not allowed as a decimal value", raises=SnowflakeProgrammingError, ), + pytest.mark.notyet( + ["trino"], + raises=TrinoUserError, + reason="can't cast infinity to decimal", + ), ], id="decimal-infinity-", ), @@ -570,7 +583,6 @@ def test_numeric_literal(con, backend, expr, expected_types): "bigquery": "FLOAT64", "snowflake": "DOUBLE", "sqlite": "null", - "trino": "decimal(2,1)", "postgres": "numeric", "impala": "DOUBLE", "duckdb": "FLOAT", @@ -639,6 +651,11 @@ def test_numeric_literal(con, backend, expr, expected_types): "NaN is not allowed as a decimal value", raises=SnowflakeProgrammingError, ), + pytest.mark.notyet( + ["trino"], + raises=TrinoUserError, + reason="can't cast nan to decimal", + ), ], id="decimal-NaN", ), @@ -1444,6 +1461,7 @@ def test_divide_by_zero(backend, alltypes, df, column, denominator): "polars", "flink", "snowflake", + "trino", ], reason="Not SQLAlchemy backends", ) diff --git a/ibis/backends/tests/test_struct.py b/ibis/backends/tests/test_struct.py index 3eaf9cae1549..d368c6b16305 100644 --- a/ibis/backends/tests/test_struct.py +++ b/ibis/backends/tests/test_struct.py @@ -24,26 +24,25 @@ [ param( "a", - [1.0, 2.0, 3.0, np.nan, 2.0, np.nan, 3.0], + [1.0, 2.0, 2.0, 3.0, 3.0, np.nan, np.nan], id="a", marks=pytest.mark.notimpl(["snowflake"]), ), param( - "b", ["banana", "apple", "orange", "banana", None, None, "orange"], id="b" + "b", ["apple", "banana", "banana", "orange", "orange", None, None], id="b" ), param( "c", - [2, 3, 4, 2, 3, np.nan, np.nan], + [2, 2, 3, 3, 4, np.nan, np.nan], id="c", marks=pytest.mark.notimpl(["snowflake"]), ), ], ) def test_single_field(struct, field, expected): - expr = struct.abc[field] + expr = struct.select(field=lambda t: t.abc[field]).order_by("field") result = expr.execute() - equal_nan = expr.type().is_numeric() - assert np.array_equal(result, expected, equal_nan=equal_nan) + tm.assert_series_equal(result.field, pd.Series(expected, name="field")) @pytest.mark.notimpl(["dask"]) diff --git a/ibis/backends/tests/test_temporal.py b/ibis/backends/tests/test_temporal.py index 50e5aff7a216..0f075ae36d09 100644 --- a/ibis/backends/tests/test_temporal.py +++ b/ibis/backends/tests/test_temporal.py @@ -29,6 +29,7 @@ Py4JJavaError, PySparkIllegalArgumentException, SnowflakeProgrammingError, + TrinoUserError, ) from ibis.common.annotations import ValidationError @@ -1815,7 +1816,7 @@ def test_integer_to_timestamp(backend, con, unit): pytest.mark.never( ["trino"], reason="datetime formatting style not supported", - raises=sa.exc.ProgrammingError, + raises=TrinoUserError, ), pytest.mark.never( ["polars"], diff --git a/ibis/backends/tests/test_window.py b/ibis/backends/tests/test_window.py index afc0a04a9732..377bdafe04a9 100644 --- a/ibis/backends/tests/test_window.py +++ b/ibis/backends/tests/test_window.py @@ -778,6 +778,7 @@ def test_simple_ungrouped_window_with_scalar_order_by(alltypes): ["trino"], reason="this isn't actually broken: the backend result is equal up to ordering", raises=AssertionError, + strict=False, # sometimes it passes ), pytest.mark.broken(["oracle"], raises=AssertionError), pytest.mark.notimpl( @@ -818,6 +819,7 @@ def test_simple_ungrouped_window_with_scalar_order_by(alltypes): "result is equal up to ordering" ), raises=AssertionError, + strict=False, # sometimes it passes ), pytest.mark.broken(["oracle"], raises=AssertionError), pytest.mark.notimpl( diff --git a/ibis/backends/tests/tpch/snapshots/test_h01/test_tpc_h01/trino/h01.sql b/ibis/backends/tests/tpch/snapshots/test_h01/test_tpc_h01/trino/h01.sql index 398fc8d0a84c..5f0048dce3fb 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h01/test_tpc_h01/trino/h01.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h01/test_tpc_h01/trino/h01.sql @@ -1,39 +1,62 @@ SELECT - t0.l_returnflag, - t0.l_linestatus, - t0.sum_qty, - t0.sum_base_price, - t0.sum_disc_price, - t0.sum_charge, - t0.avg_qty, - t0.avg_price, - t0.avg_disc, - t0.count_order + "t2"."l_returnflag", + "t2"."l_linestatus", + "t2"."sum_qty", + "t2"."sum_base_price", + "t2"."sum_disc_price", + "t2"."sum_charge", + "t2"."avg_qty", + "t2"."avg_price", + "t2"."avg_disc", + "t2"."count_order" FROM ( SELECT - t1.l_returnflag AS l_returnflag, - t1.l_linestatus AS l_linestatus, - SUM(t1.l_quantity) AS sum_qty, - SUM(t1.l_extendedprice) AS sum_base_price, - SUM(t1.l_extendedprice * ( - 1 - t1.l_discount - )) AS sum_disc_price, - SUM(t1.l_extendedprice * ( - 1 - t1.l_discount - ) * ( - t1.l_tax + 1 - )) AS sum_charge, - AVG(t1.l_quantity) AS avg_qty, - AVG(t1.l_extendedprice) AS avg_price, - AVG(t1.l_discount) AS avg_disc, - COUNT(*) AS count_order - FROM hive.ibis_sf1.lineitem AS t1 - WHERE - t1.l_shipdate <= FROM_ISO8601_DATE('1998-09-02') + "t1"."l_returnflag", + "t1"."l_linestatus", + SUM("t1"."l_quantity") AS "sum_qty", + SUM("t1"."l_extendedprice") AS "sum_base_price", + SUM("t1"."l_extendedprice" * ( + 1 - "t1"."l_discount" + )) AS "sum_disc_price", + SUM( + ( + "t1"."l_extendedprice" * ( + 1 - "t1"."l_discount" + ) + ) * ( + "t1"."l_tax" + 1 + ) + ) AS "sum_charge", + AVG("t1"."l_quantity") AS "avg_qty", + AVG("t1"."l_extendedprice") AS "avg_price", + AVG("t1"."l_discount") AS "avg_disc", + COUNT(*) AS "count_order" + FROM ( + SELECT + "t0"."l_orderkey", + "t0"."l_partkey", + "t0"."l_suppkey", + "t0"."l_linenumber", + CAST("t0"."l_quantity" AS DECIMAL(15, 2)) AS "l_quantity", + CAST("t0"."l_extendedprice" AS DECIMAL(15, 2)) AS "l_extendedprice", + CAST("t0"."l_discount" AS DECIMAL(15, 2)) AS "l_discount", + CAST("t0"."l_tax" AS DECIMAL(15, 2)) AS "l_tax", + "t0"."l_returnflag", + "t0"."l_linestatus", + "t0"."l_shipdate", + "t0"."l_commitdate", + "t0"."l_receiptdate", + "t0"."l_shipinstruct", + "t0"."l_shipmode", + "t0"."l_comment" + FROM "lineitem" AS "t0" + WHERE + "t0"."l_shipdate" <= FROM_ISO8601_DATE('1998-09-02') + ) AS "t1" GROUP BY 1, 2 -) AS t0 +) AS "t2" ORDER BY - t0.l_returnflag ASC, - t0.l_linestatus ASC \ No newline at end of file + "t2"."l_returnflag" ASC, + "t2"."l_linestatus" ASC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h02/test_tpc_h02/trino/h02.sql b/ibis/backends/tests/tpch/snapshots/test_h02/test_tpc_h02/trino/h02.sql index af4e746c080e..d76a0c18cfed 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h02/test_tpc_h02/trino/h02.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h02/test_tpc_h02/trino/h02.sql @@ -1,84 +1,190 @@ -WITH t0 AS ( - SELECT - t2.p_partkey AS p_partkey, - t2.p_name AS p_name, - t2.p_mfgr AS p_mfgr, - t2.p_brand AS p_brand, - t2.p_type AS p_type, - t2.p_size AS p_size, - t2.p_container AS p_container, - t2.p_retailprice AS p_retailprice, - t2.p_comment AS p_comment, - t3.ps_partkey AS ps_partkey, - t3.ps_suppkey AS ps_suppkey, - t3.ps_availqty AS ps_availqty, - t3.ps_supplycost AS ps_supplycost, - t3.ps_comment AS ps_comment, - t4.s_suppkey AS s_suppkey, - t4.s_name AS s_name, - t4.s_address AS s_address, - t4.s_nationkey AS s_nationkey, - t4.s_phone AS s_phone, - t4.s_acctbal AS s_acctbal, - t4.s_comment AS s_comment, - t5.n_nationkey AS n_nationkey, - t5.n_name AS n_name, - t5.n_regionkey AS n_regionkey, - t5.n_comment AS n_comment, - t6.r_regionkey AS r_regionkey, - t6.r_name AS r_name, - t6.r_comment AS r_comment - FROM hive.ibis_sf1.part AS t2 - JOIN hive.ibis_sf1.partsupp AS t3 - ON t2.p_partkey = t3.ps_partkey - JOIN hive.ibis_sf1.supplier AS t4 - ON t4.s_suppkey = t3.ps_suppkey - JOIN hive.ibis_sf1.nation AS t5 - ON t4.s_nationkey = t5.n_nationkey - JOIN hive.ibis_sf1.region AS t6 - ON t5.n_regionkey = t6.r_regionkey - WHERE - t2.p_size = 15 - AND t2.p_type LIKE '%BRASS' - AND t6.r_name = 'EUROPE' - AND t3.ps_supplycost = ( - SELECT - MIN(t3.ps_supplycost) AS "Min(ps_supplycost)" - FROM hive.ibis_sf1.partsupp AS t3 - JOIN hive.ibis_sf1.supplier AS t4 - ON t4.s_suppkey = t3.ps_suppkey - JOIN hive.ibis_sf1.nation AS t5 - ON t4.s_nationkey = t5.n_nationkey - JOIN hive.ibis_sf1.region AS t6 - ON t5.n_regionkey = t6.r_regionkey - WHERE - t6.r_name = 'EUROPE' AND t2.p_partkey = t3.ps_partkey - ) -) SELECT - t1.s_acctbal, - t1.s_name, - t1.n_name, - t1.p_partkey, - t1.p_mfgr, - t1.s_address, - t1.s_phone, - t1.s_comment + "t26"."s_acctbal", + "t26"."s_name", + "t26"."n_name", + "t26"."p_partkey", + "t26"."p_mfgr", + "t26"."s_address", + "t26"."s_phone", + "t26"."s_comment" FROM ( SELECT - t0.s_acctbal AS s_acctbal, - t0.s_name AS s_name, - t0.n_name AS n_name, - t0.p_partkey AS p_partkey, - t0.p_mfgr AS p_mfgr, - t0.s_address AS s_address, - t0.s_phone AS s_phone, - t0.s_comment AS s_comment - FROM t0 -) AS t1 + "t14"."p_partkey", + "t14"."p_name", + "t14"."p_mfgr", + "t14"."p_brand", + "t14"."p_type", + "t14"."p_size", + "t14"."p_container", + "t14"."p_retailprice", + "t14"."p_comment", + "t15"."ps_partkey", + "t15"."ps_suppkey", + "t15"."ps_availqty", + "t15"."ps_supplycost", + "t15"."ps_comment", + "t17"."s_suppkey", + "t17"."s_name", + "t17"."s_address", + "t17"."s_nationkey", + "t17"."s_phone", + "t17"."s_acctbal", + "t17"."s_comment", + "t10"."n_nationkey", + "t10"."n_name", + "t10"."n_regionkey", + "t10"."n_comment", + "t12"."r_regionkey", + "t12"."r_name", + "t12"."r_comment" + FROM ( + SELECT + "t0"."p_partkey", + "t0"."p_name", + "t0"."p_mfgr", + "t0"."p_brand", + "t0"."p_type", + "t0"."p_size", + "t0"."p_container", + CAST("t0"."p_retailprice" AS DECIMAL(15, 2)) AS "p_retailprice", + "t0"."p_comment" + FROM "part" AS "t0" + ) AS "t14" + INNER JOIN ( + SELECT + "t1"."ps_partkey", + "t1"."ps_suppkey", + "t1"."ps_availqty", + CAST("t1"."ps_supplycost" AS DECIMAL(15, 2)) AS "ps_supplycost", + "t1"."ps_comment" + FROM "partsupp" AS "t1" + ) AS "t15" + ON "t14"."p_partkey" = "t15"."ps_partkey" + INNER JOIN ( + SELECT + "t2"."s_suppkey", + "t2"."s_name", + "t2"."s_address", + "t2"."s_nationkey", + "t2"."s_phone", + CAST("t2"."s_acctbal" AS DECIMAL(15, 2)) AS "s_acctbal", + "t2"."s_comment" + FROM "supplier" AS "t2" + ) AS "t17" + ON "t17"."s_suppkey" = "t15"."ps_suppkey" + INNER JOIN ( + SELECT + "t3"."n_nationkey", + "t3"."n_name", + "t3"."n_regionkey", + "t3"."n_comment" + FROM "nation" AS "t3" + ) AS "t10" + ON "t17"."s_nationkey" = "t10"."n_nationkey" + INNER JOIN ( + SELECT + "t4"."r_regionkey", + "t4"."r_name", + "t4"."r_comment" + FROM "region" AS "t4" + ) AS "t12" + ON "t10"."n_regionkey" = "t12"."r_regionkey" +) AS "t26" +WHERE + "t26"."p_size" = 15 + AND "t26"."p_type" LIKE '%BRASS' + AND "t26"."r_name" = 'EUROPE' + AND "t26"."ps_supplycost" = ( + SELECT + MIN("t28"."ps_supplycost") AS "Min(ps_supplycost)" + FROM ( + SELECT + "t27"."ps_partkey", + "t27"."ps_suppkey", + "t27"."ps_availqty", + "t27"."ps_supplycost", + "t27"."ps_comment", + "t27"."s_suppkey", + "t27"."s_name", + "t27"."s_address", + "t27"."s_nationkey", + "t27"."s_phone", + "t27"."s_acctbal", + "t27"."s_comment", + "t27"."n_nationkey", + "t27"."n_name", + "t27"."n_regionkey", + "t27"."n_comment", + "t27"."r_regionkey", + "t27"."r_name", + "t27"."r_comment" + FROM ( + SELECT + "t16"."ps_partkey", + "t16"."ps_suppkey", + "t16"."ps_availqty", + "t16"."ps_supplycost", + "t16"."ps_comment", + "t18"."s_suppkey", + "t18"."s_name", + "t18"."s_address", + "t18"."s_nationkey", + "t18"."s_phone", + "t18"."s_acctbal", + "t18"."s_comment", + "t11"."n_nationkey", + "t11"."n_name", + "t11"."n_regionkey", + "t11"."n_comment", + "t13"."r_regionkey", + "t13"."r_name", + "t13"."r_comment" + FROM ( + SELECT + "t1"."ps_partkey", + "t1"."ps_suppkey", + "t1"."ps_availqty", + CAST("t1"."ps_supplycost" AS DECIMAL(15, 2)) AS "ps_supplycost", + "t1"."ps_comment" + FROM "partsupp" AS "t1" + ) AS "t16" + INNER JOIN ( + SELECT + "t2"."s_suppkey", + "t2"."s_name", + "t2"."s_address", + "t2"."s_nationkey", + "t2"."s_phone", + CAST("t2"."s_acctbal" AS DECIMAL(15, 2)) AS "s_acctbal", + "t2"."s_comment" + FROM "supplier" AS "t2" + ) AS "t18" + ON "t18"."s_suppkey" = "t16"."ps_suppkey" + INNER JOIN ( + SELECT + "t3"."n_nationkey", + "t3"."n_name", + "t3"."n_regionkey", + "t3"."n_comment" + FROM "nation" AS "t3" + ) AS "t11" + ON "t18"."s_nationkey" = "t11"."n_nationkey" + INNER JOIN ( + SELECT + "t4"."r_regionkey", + "t4"."r_name", + "t4"."r_comment" + FROM "region" AS "t4" + ) AS "t13" + ON "t11"."n_regionkey" = "t13"."r_regionkey" + ) AS "t27" + WHERE + "t27"."r_name" = 'EUROPE' AND "t26"."p_partkey" = "t27"."ps_partkey" + ) AS "t28" + ) ORDER BY - t1.s_acctbal DESC, - t1.n_name ASC, - t1.s_name ASC, - t1.p_partkey ASC + "t26"."s_acctbal" DESC, + "t26"."n_name" ASC, + "t26"."s_name" ASC, + "t26"."p_partkey" ASC LIMIT 100 \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h03/test_tpc_h03/trino/h03.sql b/ibis/backends/tests/tpch/snapshots/test_h03/test_tpc_h03/trino/h03.sql index c6d2d28f8db8..ca724d740022 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h03/test_tpc_h03/trino/h03.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h03/test_tpc_h03/trino/h03.sql @@ -1,39 +1,145 @@ -WITH t0 AS ( +SELECT + "t13"."l_orderkey", + "t13"."revenue", + "t13"."o_orderdate", + "t13"."o_shippriority" +FROM ( SELECT - t4.l_orderkey AS l_orderkey, - t3.o_orderdate AS o_orderdate, - t3.o_shippriority AS o_shippriority, - SUM(t4.l_extendedprice * ( - 1 - t4.l_discount - )) AS revenue - FROM hive.ibis_sf1.customer AS t2 - JOIN hive.ibis_sf1.orders AS t3 - ON t2.c_custkey = t3.o_custkey - JOIN hive.ibis_sf1.lineitem AS t4 - ON t4.l_orderkey = t3.o_orderkey - WHERE - t2.c_mktsegment = 'BUILDING' - AND t3.o_orderdate < FROM_ISO8601_DATE('1995-03-15') - AND t4.l_shipdate > FROM_ISO8601_DATE('1995-03-15') + "t12"."l_orderkey", + "t12"."o_orderdate", + "t12"."o_shippriority", + SUM("t12"."l_extendedprice" * ( + 1 - "t12"."l_discount" + )) AS "revenue" + FROM ( + SELECT + "t11"."c_custkey", + "t11"."c_name", + "t11"."c_address", + "t11"."c_nationkey", + "t11"."c_phone", + "t11"."c_acctbal", + "t11"."c_mktsegment", + "t11"."c_comment", + "t11"."o_orderkey", + "t11"."o_custkey", + "t11"."o_orderstatus", + "t11"."o_totalprice", + "t11"."o_orderdate", + "t11"."o_orderpriority", + "t11"."o_clerk", + "t11"."o_shippriority", + "t11"."o_comment", + "t11"."l_orderkey", + "t11"."l_partkey", + "t11"."l_suppkey", + "t11"."l_linenumber", + "t11"."l_quantity", + "t11"."l_extendedprice", + "t11"."l_discount", + "t11"."l_tax", + "t11"."l_returnflag", + "t11"."l_linestatus", + "t11"."l_shipdate", + "t11"."l_commitdate", + "t11"."l_receiptdate", + "t11"."l_shipinstruct", + "t11"."l_shipmode", + "t11"."l_comment" + FROM ( + SELECT + "t6"."c_custkey", + "t6"."c_name", + "t6"."c_address", + "t6"."c_nationkey", + "t6"."c_phone", + "t6"."c_acctbal", + "t6"."c_mktsegment", + "t6"."c_comment", + "t7"."o_orderkey", + "t7"."o_custkey", + "t7"."o_orderstatus", + "t7"."o_totalprice", + "t7"."o_orderdate", + "t7"."o_orderpriority", + "t7"."o_clerk", + "t7"."o_shippriority", + "t7"."o_comment", + "t8"."l_orderkey", + "t8"."l_partkey", + "t8"."l_suppkey", + "t8"."l_linenumber", + "t8"."l_quantity", + "t8"."l_extendedprice", + "t8"."l_discount", + "t8"."l_tax", + "t8"."l_returnflag", + "t8"."l_linestatus", + "t8"."l_shipdate", + "t8"."l_commitdate", + "t8"."l_receiptdate", + "t8"."l_shipinstruct", + "t8"."l_shipmode", + "t8"."l_comment" + FROM ( + SELECT + "t0"."c_custkey", + "t0"."c_name", + "t0"."c_address", + "t0"."c_nationkey", + "t0"."c_phone", + CAST("t0"."c_acctbal" AS DECIMAL(15, 2)) AS "c_acctbal", + "t0"."c_mktsegment", + "t0"."c_comment" + FROM "customer" AS "t0" + ) AS "t6" + INNER JOIN ( + SELECT + "t1"."o_orderkey", + "t1"."o_custkey", + "t1"."o_orderstatus", + CAST("t1"."o_totalprice" AS DECIMAL(15, 2)) AS "o_totalprice", + "t1"."o_orderdate", + "t1"."o_orderpriority", + "t1"."o_clerk", + "t1"."o_shippriority", + "t1"."o_comment" + FROM "orders" AS "t1" + ) AS "t7" + ON "t6"."c_custkey" = "t7"."o_custkey" + INNER JOIN ( + SELECT + "t2"."l_orderkey", + "t2"."l_partkey", + "t2"."l_suppkey", + "t2"."l_linenumber", + CAST("t2"."l_quantity" AS DECIMAL(15, 2)) AS "l_quantity", + CAST("t2"."l_extendedprice" AS DECIMAL(15, 2)) AS "l_extendedprice", + CAST("t2"."l_discount" AS DECIMAL(15, 2)) AS "l_discount", + CAST("t2"."l_tax" AS DECIMAL(15, 2)) AS "l_tax", + "t2"."l_returnflag", + "t2"."l_linestatus", + "t2"."l_shipdate", + "t2"."l_commitdate", + "t2"."l_receiptdate", + "t2"."l_shipinstruct", + "t2"."l_shipmode", + "t2"."l_comment" + FROM "lineitem" AS "t2" + ) AS "t8" + ON "t8"."l_orderkey" = "t7"."o_orderkey" + ) AS "t11" + WHERE + "t11"."c_mktsegment" = 'BUILDING' + AND "t11"."o_orderdate" < FROM_ISO8601_DATE('1995-03-15') + AND "t11"."l_shipdate" > FROM_ISO8601_DATE('1995-03-15') + ) AS "t12" GROUP BY 1, 2, 3 -) -SELECT - t1.l_orderkey, - t1.revenue, - t1.o_orderdate, - t1.o_shippriority -FROM ( - SELECT - t0.l_orderkey AS l_orderkey, - t0.revenue AS revenue, - t0.o_orderdate AS o_orderdate, - t0.o_shippriority AS o_shippriority - FROM t0 -) AS t1 +) AS "t13" ORDER BY - t1.revenue DESC, - t1.o_orderdate ASC + "t13"."revenue" DESC, + "t13"."o_orderdate" ASC LIMIT 10 \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h04/test_tpc_h04/trino/h04.sql b/ibis/backends/tests/tpch/snapshots/test_h04/test_tpc_h04/trino/h04.sql index 745fe02cef56..10bf14955d70 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h04/test_tpc_h04/trino/h04.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h04/test_tpc_h04/trino/h04.sql @@ -1,20 +1,52 @@ SELECT - t0.o_orderpriority, - COUNT(*) AS order_count -FROM hive.ibis_sf1.orders AS t0 -WHERE - ( - EXISTS( + "t5"."o_orderpriority", + "t5"."order_count" +FROM ( + SELECT + "t4"."o_orderpriority", + COUNT(*) AS "order_count" + FROM ( + SELECT + "t2"."o_orderkey", + "t2"."o_custkey", + "t2"."o_orderstatus", + "t2"."o_totalprice", + "t2"."o_orderdate", + "t2"."o_orderpriority", + "t2"."o_clerk", + "t2"."o_shippriority", + "t2"."o_comment" + FROM ( SELECT - 1 AS anon_1 - FROM hive.ibis_sf1.lineitem AS t1 - WHERE - t1.l_orderkey = t0.o_orderkey AND t1.l_commitdate < t1.l_receiptdate - ) - ) - AND t0.o_orderdate >= FROM_ISO8601_DATE('1993-07-01') - AND t0.o_orderdate < FROM_ISO8601_DATE('1993-10-01') -GROUP BY - 1 + "t0"."o_orderkey", + "t0"."o_custkey", + "t0"."o_orderstatus", + CAST("t0"."o_totalprice" AS DECIMAL(15, 2)) AS "o_totalprice", + "t0"."o_orderdate", + "t0"."o_orderpriority", + "t0"."o_clerk", + "t0"."o_shippriority", + "t0"."o_comment" + FROM "orders" AS "t0" + ) AS "t2" + WHERE + EXISTS( + SELECT + 1 AS "1" + FROM "lineitem" AS "t1" + WHERE + ( + "t1"."l_orderkey" = "t2"."o_orderkey" + ) + AND ( + "t1"."l_commitdate" < "t1"."l_receiptdate" + ) + ) + AND "t2"."o_orderdate" >= FROM_ISO8601_DATE('1993-07-01') + AND "t2"."o_orderdate" < FROM_ISO8601_DATE('1993-10-01') + ) AS "t4" + GROUP BY + 1 +) AS "t5" ORDER BY - t0.o_orderpriority ASC \ No newline at end of file + "t5"."o_orderpriority" ASC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h05/test_tpc_h05/trino/h05.sql b/ibis/backends/tests/tpch/snapshots/test_h05/test_tpc_h05/trino/h05.sql index 7f8b31a4a45c..c1bbad1c8935 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h05/test_tpc_h05/trino/h05.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h05/test_tpc_h05/trino/h05.sql @@ -1,29 +1,195 @@ SELECT - t0.n_name, - t0.revenue + "t25"."n_name", + "t25"."revenue" FROM ( SELECT - t5.n_name AS n_name, - SUM(t3.l_extendedprice * ( - 1 - t3.l_discount - )) AS revenue - FROM hive.ibis_sf1.customer AS t1 - JOIN hive.ibis_sf1.orders AS t2 - ON t1.c_custkey = t2.o_custkey - JOIN hive.ibis_sf1.lineitem AS t3 - ON t3.l_orderkey = t2.o_orderkey - JOIN hive.ibis_sf1.supplier AS t4 - ON t3.l_suppkey = t4.s_suppkey - JOIN hive.ibis_sf1.nation AS t5 - ON t1.c_nationkey = t4.s_nationkey AND t4.s_nationkey = t5.n_nationkey - JOIN hive.ibis_sf1.region AS t6 - ON t5.n_regionkey = t6.r_regionkey - WHERE - t6.r_name = 'ASIA' - AND t2.o_orderdate >= FROM_ISO8601_DATE('1994-01-01') - AND t2.o_orderdate < FROM_ISO8601_DATE('1995-01-01') + "t24"."n_name", + SUM("t24"."l_extendedprice" * ( + 1 - "t24"."l_discount" + )) AS "revenue" + FROM ( + SELECT + "t23"."c_custkey", + "t23"."c_name", + "t23"."c_address", + "t23"."c_nationkey", + "t23"."c_phone", + "t23"."c_acctbal", + "t23"."c_mktsegment", + "t23"."c_comment", + "t23"."o_orderkey", + "t23"."o_custkey", + "t23"."o_orderstatus", + "t23"."o_totalprice", + "t23"."o_orderdate", + "t23"."o_orderpriority", + "t23"."o_clerk", + "t23"."o_shippriority", + "t23"."o_comment", + "t23"."l_orderkey", + "t23"."l_partkey", + "t23"."l_suppkey", + "t23"."l_linenumber", + "t23"."l_quantity", + "t23"."l_extendedprice", + "t23"."l_discount", + "t23"."l_tax", + "t23"."l_returnflag", + "t23"."l_linestatus", + "t23"."l_shipdate", + "t23"."l_commitdate", + "t23"."l_receiptdate", + "t23"."l_shipinstruct", + "t23"."l_shipmode", + "t23"."l_comment", + "t23"."s_suppkey", + "t23"."s_name", + "t23"."s_address", + "t23"."s_nationkey", + "t23"."s_phone", + "t23"."s_acctbal", + "t23"."s_comment", + "t23"."n_nationkey", + "t23"."n_name", + "t23"."n_regionkey", + "t23"."n_comment", + "t23"."r_regionkey", + "t23"."r_name", + "t23"."r_comment" + FROM ( + SELECT + "t14"."c_custkey", + "t14"."c_name", + "t14"."c_address", + "t14"."c_nationkey", + "t14"."c_phone", + "t14"."c_acctbal", + "t14"."c_mktsegment", + "t14"."c_comment", + "t15"."o_orderkey", + "t15"."o_custkey", + "t15"."o_orderstatus", + "t15"."o_totalprice", + "t15"."o_orderdate", + "t15"."o_orderpriority", + "t15"."o_clerk", + "t15"."o_shippriority", + "t15"."o_comment", + "t16"."l_orderkey", + "t16"."l_partkey", + "t16"."l_suppkey", + "t16"."l_linenumber", + "t16"."l_quantity", + "t16"."l_extendedprice", + "t16"."l_discount", + "t16"."l_tax", + "t16"."l_returnflag", + "t16"."l_linestatus", + "t16"."l_shipdate", + "t16"."l_commitdate", + "t16"."l_receiptdate", + "t16"."l_shipinstruct", + "t16"."l_shipmode", + "t16"."l_comment", + "t17"."s_suppkey", + "t17"."s_name", + "t17"."s_address", + "t17"."s_nationkey", + "t17"."s_phone", + "t17"."s_acctbal", + "t17"."s_comment", + "t12"."n_nationkey", + "t12"."n_name", + "t12"."n_regionkey", + "t12"."n_comment", + "t13"."r_regionkey", + "t13"."r_name", + "t13"."r_comment" + FROM ( + SELECT + "t0"."c_custkey", + "t0"."c_name", + "t0"."c_address", + "t0"."c_nationkey", + "t0"."c_phone", + CAST("t0"."c_acctbal" AS DECIMAL(15, 2)) AS "c_acctbal", + "t0"."c_mktsegment", + "t0"."c_comment" + FROM "customer" AS "t0" + ) AS "t14" + INNER JOIN ( + SELECT + "t1"."o_orderkey", + "t1"."o_custkey", + "t1"."o_orderstatus", + CAST("t1"."o_totalprice" AS DECIMAL(15, 2)) AS "o_totalprice", + "t1"."o_orderdate", + "t1"."o_orderpriority", + "t1"."o_clerk", + "t1"."o_shippriority", + "t1"."o_comment" + FROM "orders" AS "t1" + ) AS "t15" + ON "t14"."c_custkey" = "t15"."o_custkey" + INNER JOIN ( + SELECT + "t2"."l_orderkey", + "t2"."l_partkey", + "t2"."l_suppkey", + "t2"."l_linenumber", + CAST("t2"."l_quantity" AS DECIMAL(15, 2)) AS "l_quantity", + CAST("t2"."l_extendedprice" AS DECIMAL(15, 2)) AS "l_extendedprice", + CAST("t2"."l_discount" AS DECIMAL(15, 2)) AS "l_discount", + CAST("t2"."l_tax" AS DECIMAL(15, 2)) AS "l_tax", + "t2"."l_returnflag", + "t2"."l_linestatus", + "t2"."l_shipdate", + "t2"."l_commitdate", + "t2"."l_receiptdate", + "t2"."l_shipinstruct", + "t2"."l_shipmode", + "t2"."l_comment" + FROM "lineitem" AS "t2" + ) AS "t16" + ON "t16"."l_orderkey" = "t15"."o_orderkey" + INNER JOIN ( + SELECT + "t3"."s_suppkey", + "t3"."s_name", + "t3"."s_address", + "t3"."s_nationkey", + "t3"."s_phone", + CAST("t3"."s_acctbal" AS DECIMAL(15, 2)) AS "s_acctbal", + "t3"."s_comment" + FROM "supplier" AS "t3" + ) AS "t17" + ON "t16"."l_suppkey" = "t17"."s_suppkey" + INNER JOIN ( + SELECT + "t4"."n_nationkey", + "t4"."n_name", + "t4"."n_regionkey", + "t4"."n_comment" + FROM "nation" AS "t4" + ) AS "t12" + ON "t14"."c_nationkey" = "t17"."s_nationkey" + AND "t17"."s_nationkey" = "t12"."n_nationkey" + INNER JOIN ( + SELECT + "t5"."r_regionkey", + "t5"."r_name", + "t5"."r_comment" + FROM "region" AS "t5" + ) AS "t13" + ON "t12"."n_regionkey" = "t13"."r_regionkey" + ) AS "t23" + WHERE + "t23"."r_name" = 'ASIA' + AND "t23"."o_orderdate" >= FROM_ISO8601_DATE('1994-01-01') + AND "t23"."o_orderdate" < FROM_ISO8601_DATE('1995-01-01') + ) AS "t24" GROUP BY 1 -) AS t0 +) AS "t25" ORDER BY - t0.revenue DESC \ No newline at end of file + "t25"."revenue" DESC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h06/test_tpc_h06/trino/h06.sql b/ibis/backends/tests/tpch/snapshots/test_h06/test_tpc_h06/trino/h06.sql index 9e96d3028389..1984dc9737ef 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h06/test_tpc_h06/trino/h06.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h06/test_tpc_h06/trino/h06.sql @@ -1,8 +1,27 @@ SELECT - SUM(t0.l_extendedprice * t0.l_discount) AS revenue -FROM hive.ibis_sf1.lineitem AS t0 -WHERE - t0.l_shipdate >= FROM_ISO8601_DATE('1994-01-01') - AND t0.l_shipdate < FROM_ISO8601_DATE('1995-01-01') - AND t0.l_discount BETWEEN 0.05 AND 0.07 - AND t0.l_quantity < 24 \ No newline at end of file + SUM("t1"."l_extendedprice" * "t1"."l_discount") AS "revenue" +FROM ( + SELECT + "t0"."l_orderkey", + "t0"."l_partkey", + "t0"."l_suppkey", + "t0"."l_linenumber", + CAST("t0"."l_quantity" AS DECIMAL(15, 2)) AS "l_quantity", + CAST("t0"."l_extendedprice" AS DECIMAL(15, 2)) AS "l_extendedprice", + CAST("t0"."l_discount" AS DECIMAL(15, 2)) AS "l_discount", + CAST("t0"."l_tax" AS DECIMAL(15, 2)) AS "l_tax", + "t0"."l_returnflag", + "t0"."l_linestatus", + "t0"."l_shipdate", + "t0"."l_commitdate", + "t0"."l_receiptdate", + "t0"."l_shipinstruct", + "t0"."l_shipmode", + "t0"."l_comment" + FROM "lineitem" AS "t0" + WHERE + "t0"."l_shipdate" >= FROM_ISO8601_DATE('1994-01-01') + AND "t0"."l_shipdate" < FROM_ISO8601_DATE('1995-01-01') + AND CAST("t0"."l_discount" AS DECIMAL(15, 2)) BETWEEN CAST(0.05 AS DOUBLE) AND CAST(0.07 AS DOUBLE) + AND CAST("t0"."l_quantity" AS DECIMAL(15, 2)) < 24 +) AS "t1" \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h07/test_tpc_h07/trino/h07.sql b/ibis/backends/tests/tpch/snapshots/test_h07/test_tpc_h07/trino/h07.sql index 01cd03444260..74c153ef4979 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h07/test_tpc_h07/trino/h07.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h07/test_tpc_h07/trino/h07.sql @@ -1,51 +1,137 @@ -WITH t0 AS ( - SELECT - t6.n_name AS supp_nation, - t7.n_name AS cust_nation, - t3.l_shipdate AS l_shipdate, - t3.l_extendedprice AS l_extendedprice, - t3.l_discount AS l_discount, - CAST(EXTRACT(year FROM t3.l_shipdate) AS SMALLINT) AS l_year, - t3.l_extendedprice * ( - 1 - t3.l_discount - ) AS volume - FROM hive.ibis_sf1.supplier AS t2 - JOIN hive.ibis_sf1.lineitem AS t3 - ON t2.s_suppkey = t3.l_suppkey - JOIN hive.ibis_sf1.orders AS t4 - ON t4.o_orderkey = t3.l_orderkey - JOIN hive.ibis_sf1.customer AS t5 - ON t5.c_custkey = t4.o_custkey - JOIN hive.ibis_sf1.nation AS t6 - ON t2.s_nationkey = t6.n_nationkey - JOIN hive.ibis_sf1.nation AS t7 - ON t5.c_nationkey = t7.n_nationkey -) SELECT - t1.supp_nation, - t1.cust_nation, - t1.l_year, - t1.revenue + "t24"."supp_nation", + "t24"."cust_nation", + "t24"."l_year", + "t24"."revenue" FROM ( SELECT - t0.supp_nation AS supp_nation, - t0.cust_nation AS cust_nation, - t0.l_year AS l_year, - SUM(t0.volume) AS revenue - FROM t0 - WHERE - ( - t0.cust_nation = 'FRANCE' AND t0.supp_nation = 'GERMANY' - OR t0.cust_nation = 'GERMANY' - AND t0.supp_nation = 'FRANCE' - ) - AND t0.l_shipdate BETWEEN FROM_ISO8601_DATE('1995-01-01') AND FROM_ISO8601_DATE('1996-12-31') + "t23"."supp_nation", + "t23"."cust_nation", + "t23"."l_year", + SUM("t23"."volume") AS "revenue" + FROM ( + SELECT + "t22"."supp_nation", + "t22"."cust_nation", + "t22"."l_shipdate", + "t22"."l_extendedprice", + "t22"."l_discount", + "t22"."l_year", + "t22"."volume" + FROM ( + SELECT + "t10"."n_name" AS "supp_nation", + "t16"."n_name" AS "cust_nation", + "t13"."l_shipdate", + "t13"."l_extendedprice", + "t13"."l_discount", + EXTRACT(year FROM "t13"."l_shipdate") AS "l_year", + "t13"."l_extendedprice" * ( + 1 - "t13"."l_discount" + ) AS "volume" + FROM ( + SELECT + "t0"."s_suppkey", + "t0"."s_name", + "t0"."s_address", + "t0"."s_nationkey", + "t0"."s_phone", + CAST("t0"."s_acctbal" AS DECIMAL(15, 2)) AS "s_acctbal", + "t0"."s_comment" + FROM "supplier" AS "t0" + ) AS "t12" + INNER JOIN ( + SELECT + "t1"."l_orderkey", + "t1"."l_partkey", + "t1"."l_suppkey", + "t1"."l_linenumber", + CAST("t1"."l_quantity" AS DECIMAL(15, 2)) AS "l_quantity", + CAST("t1"."l_extendedprice" AS DECIMAL(15, 2)) AS "l_extendedprice", + CAST("t1"."l_discount" AS DECIMAL(15, 2)) AS "l_discount", + CAST("t1"."l_tax" AS DECIMAL(15, 2)) AS "l_tax", + "t1"."l_returnflag", + "t1"."l_linestatus", + "t1"."l_shipdate", + "t1"."l_commitdate", + "t1"."l_receiptdate", + "t1"."l_shipinstruct", + "t1"."l_shipmode", + "t1"."l_comment" + FROM "lineitem" AS "t1" + ) AS "t13" + ON "t12"."s_suppkey" = "t13"."l_suppkey" + INNER JOIN ( + SELECT + "t2"."o_orderkey", + "t2"."o_custkey", + "t2"."o_orderstatus", + CAST("t2"."o_totalprice" AS DECIMAL(15, 2)) AS "o_totalprice", + "t2"."o_orderdate", + "t2"."o_orderpriority", + "t2"."o_clerk", + "t2"."o_shippriority", + "t2"."o_comment" + FROM "orders" AS "t2" + ) AS "t14" + ON "t14"."o_orderkey" = "t13"."l_orderkey" + INNER JOIN ( + SELECT + "t3"."c_custkey", + "t3"."c_name", + "t3"."c_address", + "t3"."c_nationkey", + "t3"."c_phone", + CAST("t3"."c_acctbal" AS DECIMAL(15, 2)) AS "c_acctbal", + "t3"."c_mktsegment", + "t3"."c_comment" + FROM "customer" AS "t3" + ) AS "t15" + ON "t15"."c_custkey" = "t14"."o_custkey" + INNER JOIN ( + SELECT + "t4"."n_nationkey", + "t4"."n_name", + "t4"."n_regionkey", + "t4"."n_comment" + FROM "nation" AS "t4" + ) AS "t10" + ON "t12"."s_nationkey" = "t10"."n_nationkey" + INNER JOIN ( + SELECT + "t4"."n_nationkey", + "t4"."n_name", + "t4"."n_regionkey", + "t4"."n_comment" + FROM "nation" AS "t4" + ) AS "t16" + ON "t15"."c_nationkey" = "t16"."n_nationkey" + ) AS "t22" + WHERE + ( + ( + ( + "t22"."cust_nation" = 'FRANCE' + ) AND ( + "t22"."supp_nation" = 'GERMANY' + ) + ) + OR ( + ( + "t22"."cust_nation" = 'GERMANY' + ) AND ( + "t22"."supp_nation" = 'FRANCE' + ) + ) + ) + AND "t22"."l_shipdate" BETWEEN FROM_ISO8601_DATE('1995-01-01') AND FROM_ISO8601_DATE('1996-12-31') + ) AS "t23" GROUP BY 1, 2, 3 -) AS t1 +) AS "t24" ORDER BY - t1.supp_nation ASC, - t1.cust_nation ASC, - t1.l_year ASC \ No newline at end of file + "t24"."supp_nation" ASC, + "t24"."cust_nation" ASC, + "t24"."l_year" ASC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h08/test_tpc_h08/trino/h08.sql b/ibis/backends/tests/tpch/snapshots/test_h08/test_tpc_h08/trino/h08.sql index 7b8fa445e977..80eaa5b0522c 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h08/test_tpc_h08/trino/h08.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h08/test_tpc_h08/trino/h08.sql @@ -1,64 +1,136 @@ -WITH t0 AS ( - SELECT - CAST(EXTRACT(year FROM t7.o_orderdate) AS SMALLINT) AS o_year, - t5.l_extendedprice * ( - 1 - t5.l_discount - ) AS volume, - t11.n_name AS nation, - t10.r_name AS r_name, - t7.o_orderdate AS o_orderdate, - t4.p_type AS p_type - FROM hive.ibis_sf1.part AS t4 - JOIN hive.ibis_sf1.lineitem AS t5 - ON t4.p_partkey = t5.l_partkey - JOIN hive.ibis_sf1.supplier AS t6 - ON t6.s_suppkey = t5.l_suppkey - JOIN hive.ibis_sf1.orders AS t7 - ON t5.l_orderkey = t7.o_orderkey - JOIN hive.ibis_sf1.customer AS t8 - ON t7.o_custkey = t8.c_custkey - JOIN hive.ibis_sf1.nation AS t9 - ON t8.c_nationkey = t9.n_nationkey - JOIN hive.ibis_sf1.region AS t10 - ON t9.n_regionkey = t10.r_regionkey - JOIN hive.ibis_sf1.nation AS t11 - ON t6.s_nationkey = t11.n_nationkey -), t1 AS ( - SELECT - t0.o_year AS o_year, - t0.volume AS volume, - t0.nation AS nation, - t0.r_name AS r_name, - t0.o_orderdate AS o_orderdate, - t0.p_type AS p_type - FROM t0 - WHERE - t0.r_name = 'AMERICA' - AND t0.o_orderdate BETWEEN FROM_ISO8601_DATE('1995-01-01') AND FROM_ISO8601_DATE('1996-12-31') - AND t0.p_type = 'ECONOMY ANODIZED STEEL' -), t2 AS ( - SELECT - t1.o_year AS o_year, - t1.volume AS volume, - t1.nation AS nation, - t1.r_name AS r_name, - t1.o_orderdate AS o_orderdate, - t1.p_type AS p_type, - CASE WHEN ( - t1.nation = 'BRAZIL' - ) THEN t1.volume ELSE 0 END AS nation_volume - FROM t1 -) SELECT - t3.o_year, - t3.mkt_share + "t32"."o_year", + "t32"."mkt_share" FROM ( SELECT - t2.o_year AS o_year, - SUM(t2.nation_volume) / SUM(t2.volume) AS mkt_share - FROM t2 + "t31"."o_year", + CAST(SUM("t31"."nation_volume") AS DOUBLE) / SUM("t31"."volume") AS "mkt_share" + FROM ( + SELECT + "t30"."o_year", + "t30"."volume", + "t30"."nation", + "t30"."r_name", + "t30"."o_orderdate", + "t30"."p_type", + CASE WHEN "t30"."nation" = 'BRAZIL' THEN "t30"."volume" ELSE 0 END AS "nation_volume" + FROM ( + SELECT + EXTRACT(year FROM "t20"."o_orderdate") AS "o_year", + "t18"."l_extendedprice" * ( + 1 - "t18"."l_discount" + ) AS "volume", + "t22"."n_name" AS "nation", + "t16"."r_name", + "t20"."o_orderdate", + "t17"."p_type" + FROM ( + SELECT + "t0"."p_partkey", + "t0"."p_name", + "t0"."p_mfgr", + "t0"."p_brand", + "t0"."p_type", + "t0"."p_size", + "t0"."p_container", + CAST("t0"."p_retailprice" AS DECIMAL(15, 2)) AS "p_retailprice", + "t0"."p_comment" + FROM "part" AS "t0" + ) AS "t17" + INNER JOIN ( + SELECT + "t1"."l_orderkey", + "t1"."l_partkey", + "t1"."l_suppkey", + "t1"."l_linenumber", + CAST("t1"."l_quantity" AS DECIMAL(15, 2)) AS "l_quantity", + CAST("t1"."l_extendedprice" AS DECIMAL(15, 2)) AS "l_extendedprice", + CAST("t1"."l_discount" AS DECIMAL(15, 2)) AS "l_discount", + CAST("t1"."l_tax" AS DECIMAL(15, 2)) AS "l_tax", + "t1"."l_returnflag", + "t1"."l_linestatus", + "t1"."l_shipdate", + "t1"."l_commitdate", + "t1"."l_receiptdate", + "t1"."l_shipinstruct", + "t1"."l_shipmode", + "t1"."l_comment" + FROM "lineitem" AS "t1" + ) AS "t18" + ON "t17"."p_partkey" = "t18"."l_partkey" + INNER JOIN ( + SELECT + "t2"."s_suppkey", + "t2"."s_name", + "t2"."s_address", + "t2"."s_nationkey", + "t2"."s_phone", + CAST("t2"."s_acctbal" AS DECIMAL(15, 2)) AS "s_acctbal", + "t2"."s_comment" + FROM "supplier" AS "t2" + ) AS "t19" + ON "t19"."s_suppkey" = "t18"."l_suppkey" + INNER JOIN ( + SELECT + "t3"."o_orderkey", + "t3"."o_custkey", + "t3"."o_orderstatus", + CAST("t3"."o_totalprice" AS DECIMAL(15, 2)) AS "o_totalprice", + "t3"."o_orderdate", + "t3"."o_orderpriority", + "t3"."o_clerk", + "t3"."o_shippriority", + "t3"."o_comment" + FROM "orders" AS "t3" + ) AS "t20" + ON "t18"."l_orderkey" = "t20"."o_orderkey" + INNER JOIN ( + SELECT + "t4"."c_custkey", + "t4"."c_name", + "t4"."c_address", + "t4"."c_nationkey", + "t4"."c_phone", + CAST("t4"."c_acctbal" AS DECIMAL(15, 2)) AS "c_acctbal", + "t4"."c_mktsegment", + "t4"."c_comment" + FROM "customer" AS "t4" + ) AS "t21" + ON "t20"."o_custkey" = "t21"."c_custkey" + INNER JOIN ( + SELECT + "t5"."n_nationkey", + "t5"."n_name", + "t5"."n_regionkey", + "t5"."n_comment" + FROM "nation" AS "t5" + ) AS "t14" + ON "t21"."c_nationkey" = "t14"."n_nationkey" + INNER JOIN ( + SELECT + "t6"."r_regionkey", + "t6"."r_name", + "t6"."r_comment" + FROM "region" AS "t6" + ) AS "t16" + ON "t14"."n_regionkey" = "t16"."r_regionkey" + INNER JOIN ( + SELECT + "t5"."n_nationkey", + "t5"."n_name", + "t5"."n_regionkey", + "t5"."n_comment" + FROM "nation" AS "t5" + ) AS "t22" + ON "t19"."s_nationkey" = "t22"."n_nationkey" + ) AS "t30" + WHERE + "t30"."r_name" = 'AMERICA' + AND "t30"."o_orderdate" BETWEEN FROM_ISO8601_DATE('1995-01-01') AND FROM_ISO8601_DATE('1996-12-31') + AND "t30"."p_type" = 'ECONOMY ANODIZED STEEL' + ) AS "t31" GROUP BY 1 -) AS t3 +) AS "t32" ORDER BY - t3.o_year ASC \ No newline at end of file + "t32"."o_year" ASC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h09/test_tpc_h09/trino/h09.sql b/ibis/backends/tests/tpch/snapshots/test_h09/test_tpc_h09/trino/h09.sql index 2656c917ba20..3e652f95bc61 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h09/test_tpc_h09/trino/h09.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h09/test_tpc_h09/trino/h09.sql @@ -1,39 +1,117 @@ -WITH t0 AS ( - SELECT - t2.l_extendedprice * ( - 1 - t2.l_discount - ) - t4.ps_supplycost * t2.l_quantity AS amount, - CAST(EXTRACT(year FROM t6.o_orderdate) AS SMALLINT) AS o_year, - t7.n_name AS nation, - t5.p_name AS p_name - FROM hive.ibis_sf1.lineitem AS t2 - JOIN hive.ibis_sf1.supplier AS t3 - ON t3.s_suppkey = t2.l_suppkey - JOIN hive.ibis_sf1.partsupp AS t4 - ON t4.ps_suppkey = t2.l_suppkey AND t4.ps_partkey = t2.l_partkey - JOIN hive.ibis_sf1.part AS t5 - ON t5.p_partkey = t2.l_partkey - JOIN hive.ibis_sf1.orders AS t6 - ON t6.o_orderkey = t2.l_orderkey - JOIN hive.ibis_sf1.nation AS t7 - ON t3.s_nationkey = t7.n_nationkey - WHERE - t5.p_name LIKE '%green%' -) SELECT - t1.nation, - t1.o_year, - t1.sum_profit + "t25"."nation", + "t25"."o_year", + "t25"."sum_profit" FROM ( SELECT - t0.nation AS nation, - t0.o_year AS o_year, - SUM(t0.amount) AS sum_profit - FROM t0 + "t24"."nation", + "t24"."o_year", + SUM("t24"."amount") AS "sum_profit" + FROM ( + SELECT + "t23"."amount", + "t23"."o_year", + "t23"."nation", + "t23"."p_name" + FROM ( + SELECT + ( + "t13"."l_extendedprice" * ( + 1 - "t13"."l_discount" + ) + ) - ( + "t15"."ps_supplycost" * "t13"."l_quantity" + ) AS "amount", + EXTRACT(year FROM "t17"."o_orderdate") AS "o_year", + "t12"."n_name" AS "nation", + "t16"."p_name" + FROM ( + SELECT + "t0"."l_orderkey", + "t0"."l_partkey", + "t0"."l_suppkey", + "t0"."l_linenumber", + CAST("t0"."l_quantity" AS DECIMAL(15, 2)) AS "l_quantity", + CAST("t0"."l_extendedprice" AS DECIMAL(15, 2)) AS "l_extendedprice", + CAST("t0"."l_discount" AS DECIMAL(15, 2)) AS "l_discount", + CAST("t0"."l_tax" AS DECIMAL(15, 2)) AS "l_tax", + "t0"."l_returnflag", + "t0"."l_linestatus", + "t0"."l_shipdate", + "t0"."l_commitdate", + "t0"."l_receiptdate", + "t0"."l_shipinstruct", + "t0"."l_shipmode", + "t0"."l_comment" + FROM "lineitem" AS "t0" + ) AS "t13" + INNER JOIN ( + SELECT + "t1"."s_suppkey", + "t1"."s_name", + "t1"."s_address", + "t1"."s_nationkey", + "t1"."s_phone", + CAST("t1"."s_acctbal" AS DECIMAL(15, 2)) AS "s_acctbal", + "t1"."s_comment" + FROM "supplier" AS "t1" + ) AS "t14" + ON "t14"."s_suppkey" = "t13"."l_suppkey" + INNER JOIN ( + SELECT + "t2"."ps_partkey", + "t2"."ps_suppkey", + "t2"."ps_availqty", + CAST("t2"."ps_supplycost" AS DECIMAL(15, 2)) AS "ps_supplycost", + "t2"."ps_comment" + FROM "partsupp" AS "t2" + ) AS "t15" + ON "t15"."ps_suppkey" = "t13"."l_suppkey" AND "t15"."ps_partkey" = "t13"."l_partkey" + INNER JOIN ( + SELECT + "t3"."p_partkey", + "t3"."p_name", + "t3"."p_mfgr", + "t3"."p_brand", + "t3"."p_type", + "t3"."p_size", + "t3"."p_container", + CAST("t3"."p_retailprice" AS DECIMAL(15, 2)) AS "p_retailprice", + "t3"."p_comment" + FROM "part" AS "t3" + ) AS "t16" + ON "t16"."p_partkey" = "t13"."l_partkey" + INNER JOIN ( + SELECT + "t4"."o_orderkey", + "t4"."o_custkey", + "t4"."o_orderstatus", + CAST("t4"."o_totalprice" AS DECIMAL(15, 2)) AS "o_totalprice", + "t4"."o_orderdate", + "t4"."o_orderpriority", + "t4"."o_clerk", + "t4"."o_shippriority", + "t4"."o_comment" + FROM "orders" AS "t4" + ) AS "t17" + ON "t17"."o_orderkey" = "t13"."l_orderkey" + INNER JOIN ( + SELECT + "t5"."n_nationkey", + "t5"."n_name", + "t5"."n_regionkey", + "t5"."n_comment" + FROM "nation" AS "t5" + ) AS "t12" + ON "t14"."s_nationkey" = "t12"."n_nationkey" + ) AS "t23" + WHERE + "t23"."p_name" LIKE '%green%' + ) AS "t24" GROUP BY 1, 2 -) AS t1 +) AS "t25" ORDER BY - t1.nation ASC, - t1.o_year DESC \ No newline at end of file + "t25"."nation" ASC, + "t25"."o_year" DESC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h10/test_tpc_h10/trino/h10.sql b/ibis/backends/tests/tpch/snapshots/test_h10/test_tpc_h10/trino/h10.sql index a754dbf57432..d30ac72d2fd8 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h10/test_tpc_h10/trino/h10.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h10/test_tpc_h10/trino/h10.sql @@ -1,26 +1,164 @@ -WITH t0 AS ( +SELECT + "t17"."c_custkey", + "t17"."c_name", + "t17"."revenue", + "t17"."c_acctbal", + "t17"."n_name", + "t17"."c_address", + "t17"."c_phone", + "t17"."c_comment" +FROM ( SELECT - t2.c_custkey AS c_custkey, - t2.c_name AS c_name, - t2.c_acctbal AS c_acctbal, - t5.n_name AS n_name, - t2.c_address AS c_address, - t2.c_phone AS c_phone, - t2.c_comment AS c_comment, - SUM(t4.l_extendedprice * ( - 1 - t4.l_discount - )) AS revenue - FROM hive.ibis_sf1.customer AS t2 - JOIN hive.ibis_sf1.orders AS t3 - ON t2.c_custkey = t3.o_custkey - JOIN hive.ibis_sf1.lineitem AS t4 - ON t4.l_orderkey = t3.o_orderkey - JOIN hive.ibis_sf1.nation AS t5 - ON t2.c_nationkey = t5.n_nationkey - WHERE - t3.o_orderdate >= FROM_ISO8601_DATE('1993-10-01') - AND t3.o_orderdate < FROM_ISO8601_DATE('1994-01-01') - AND t4.l_returnflag = 'R' + "t16"."c_custkey", + "t16"."c_name", + "t16"."c_acctbal", + "t16"."n_name", + "t16"."c_address", + "t16"."c_phone", + "t16"."c_comment", + SUM("t16"."l_extendedprice" * ( + 1 - "t16"."l_discount" + )) AS "revenue" + FROM ( + SELECT + "t15"."c_custkey", + "t15"."c_name", + "t15"."c_address", + "t15"."c_nationkey", + "t15"."c_phone", + "t15"."c_acctbal", + "t15"."c_mktsegment", + "t15"."c_comment", + "t15"."o_orderkey", + "t15"."o_custkey", + "t15"."o_orderstatus", + "t15"."o_totalprice", + "t15"."o_orderdate", + "t15"."o_orderpriority", + "t15"."o_clerk", + "t15"."o_shippriority", + "t15"."o_comment", + "t15"."l_orderkey", + "t15"."l_partkey", + "t15"."l_suppkey", + "t15"."l_linenumber", + "t15"."l_quantity", + "t15"."l_extendedprice", + "t15"."l_discount", + "t15"."l_tax", + "t15"."l_returnflag", + "t15"."l_linestatus", + "t15"."l_shipdate", + "t15"."l_commitdate", + "t15"."l_receiptdate", + "t15"."l_shipinstruct", + "t15"."l_shipmode", + "t15"."l_comment", + "t15"."n_nationkey", + "t15"."n_name", + "t15"."n_regionkey", + "t15"."n_comment" + FROM ( + SELECT + "t9"."c_custkey", + "t9"."c_name", + "t9"."c_address", + "t9"."c_nationkey", + "t9"."c_phone", + "t9"."c_acctbal", + "t9"."c_mktsegment", + "t9"."c_comment", + "t10"."o_orderkey", + "t10"."o_custkey", + "t10"."o_orderstatus", + "t10"."o_totalprice", + "t10"."o_orderdate", + "t10"."o_orderpriority", + "t10"."o_clerk", + "t10"."o_shippriority", + "t10"."o_comment", + "t11"."l_orderkey", + "t11"."l_partkey", + "t11"."l_suppkey", + "t11"."l_linenumber", + "t11"."l_quantity", + "t11"."l_extendedprice", + "t11"."l_discount", + "t11"."l_tax", + "t11"."l_returnflag", + "t11"."l_linestatus", + "t11"."l_shipdate", + "t11"."l_commitdate", + "t11"."l_receiptdate", + "t11"."l_shipinstruct", + "t11"."l_shipmode", + "t11"."l_comment", + "t8"."n_nationkey", + "t8"."n_name", + "t8"."n_regionkey", + "t8"."n_comment" + FROM ( + SELECT + "t0"."c_custkey", + "t0"."c_name", + "t0"."c_address", + "t0"."c_nationkey", + "t0"."c_phone", + CAST("t0"."c_acctbal" AS DECIMAL(15, 2)) AS "c_acctbal", + "t0"."c_mktsegment", + "t0"."c_comment" + FROM "customer" AS "t0" + ) AS "t9" + INNER JOIN ( + SELECT + "t1"."o_orderkey", + "t1"."o_custkey", + "t1"."o_orderstatus", + CAST("t1"."o_totalprice" AS DECIMAL(15, 2)) AS "o_totalprice", + "t1"."o_orderdate", + "t1"."o_orderpriority", + "t1"."o_clerk", + "t1"."o_shippriority", + "t1"."o_comment" + FROM "orders" AS "t1" + ) AS "t10" + ON "t9"."c_custkey" = "t10"."o_custkey" + INNER JOIN ( + SELECT + "t2"."l_orderkey", + "t2"."l_partkey", + "t2"."l_suppkey", + "t2"."l_linenumber", + CAST("t2"."l_quantity" AS DECIMAL(15, 2)) AS "l_quantity", + CAST("t2"."l_extendedprice" AS DECIMAL(15, 2)) AS "l_extendedprice", + CAST("t2"."l_discount" AS DECIMAL(15, 2)) AS "l_discount", + CAST("t2"."l_tax" AS DECIMAL(15, 2)) AS "l_tax", + "t2"."l_returnflag", + "t2"."l_linestatus", + "t2"."l_shipdate", + "t2"."l_commitdate", + "t2"."l_receiptdate", + "t2"."l_shipinstruct", + "t2"."l_shipmode", + "t2"."l_comment" + FROM "lineitem" AS "t2" + ) AS "t11" + ON "t11"."l_orderkey" = "t10"."o_orderkey" + INNER JOIN ( + SELECT + "t3"."n_nationkey", + "t3"."n_name", + "t3"."n_regionkey", + "t3"."n_comment" + FROM "nation" AS "t3" + ) AS "t8" + ON "t9"."c_nationkey" = "t8"."n_nationkey" + ) AS "t15" + WHERE + "t15"."o_orderdate" >= FROM_ISO8601_DATE('1993-10-01') + AND "t15"."o_orderdate" < FROM_ISO8601_DATE('1994-01-01') + AND "t15"."l_returnflag" = 'R' + ) AS "t16" GROUP BY 1, 2, @@ -29,28 +167,7 @@ WITH t0 AS ( 5, 6, 7 -) -SELECT - t1.c_custkey, - t1.c_name, - t1.revenue, - t1.c_acctbal, - t1.n_name, - t1.c_address, - t1.c_phone, - t1.c_comment -FROM ( - SELECT - t0.c_custkey AS c_custkey, - t0.c_name AS c_name, - t0.revenue AS revenue, - t0.c_acctbal AS c_acctbal, - t0.n_name AS n_name, - t0.c_address AS c_address, - t0.c_phone AS c_phone, - t0.c_comment AS c_comment - FROM t0 -) AS t1 +) AS "t17" ORDER BY - t1.revenue DESC + "t17"."revenue" DESC LIMIT 20 \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h11/test_tpc_h11/trino/h11.sql b/ibis/backends/tests/tpch/snapshots/test_h11/test_tpc_h11/trino/h11.sql index eee1dda0fb53..4d85a7eb8a1b 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h11/test_tpc_h11/trino/h11.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h11/test_tpc_h11/trino/h11.sql @@ -1,41 +1,159 @@ -WITH t0 AS ( - SELECT - t2.ps_partkey AS ps_partkey, - SUM(t2.ps_supplycost * t2.ps_availqty) AS value - FROM hive.ibis_sf1.partsupp AS t2 - JOIN hive.ibis_sf1.supplier AS t3 - ON t2.ps_suppkey = t3.s_suppkey - JOIN hive.ibis_sf1.nation AS t4 - ON t4.n_nationkey = t3.s_nationkey - WHERE - t4.n_name = 'GERMANY' - GROUP BY - 1 -) SELECT - t1.ps_partkey, - t1.value + "t13"."ps_partkey", + "t13"."value" FROM ( SELECT - t0.ps_partkey AS ps_partkey, - t0.value AS value - FROM t0 - WHERE - t0.value > ( + "t12"."ps_partkey", + SUM("t12"."ps_supplycost" * "t12"."ps_availqty") AS "value" + FROM ( + SELECT + "t11"."ps_partkey", + "t11"."ps_suppkey", + "t11"."ps_availqty", + "t11"."ps_supplycost", + "t11"."ps_comment", + "t11"."s_suppkey", + "t11"."s_name", + "t11"."s_address", + "t11"."s_nationkey", + "t11"."s_phone", + "t11"."s_acctbal", + "t11"."s_comment", + "t11"."n_nationkey", + "t11"."n_name", + "t11"."n_regionkey", + "t11"."n_comment" + FROM ( + SELECT + "t7"."ps_partkey", + "t7"."ps_suppkey", + "t7"."ps_availqty", + "t7"."ps_supplycost", + "t7"."ps_comment", + "t8"."s_suppkey", + "t8"."s_name", + "t8"."s_address", + "t8"."s_nationkey", + "t8"."s_phone", + "t8"."s_acctbal", + "t8"."s_comment", + "t6"."n_nationkey", + "t6"."n_name", + "t6"."n_regionkey", + "t6"."n_comment" + FROM ( + SELECT + "t0"."ps_partkey", + "t0"."ps_suppkey", + "t0"."ps_availqty", + CAST("t0"."ps_supplycost" AS DECIMAL(15, 2)) AS "ps_supplycost", + "t0"."ps_comment" + FROM "partsupp" AS "t0" + ) AS "t7" + INNER JOIN ( + SELECT + "t1"."s_suppkey", + "t1"."s_name", + "t1"."s_address", + "t1"."s_nationkey", + "t1"."s_phone", + CAST("t1"."s_acctbal" AS DECIMAL(15, 2)) AS "s_acctbal", + "t1"."s_comment" + FROM "supplier" AS "t1" + ) AS "t8" + ON "t7"."ps_suppkey" = "t8"."s_suppkey" + INNER JOIN ( + SELECT + "t2"."n_nationkey", + "t2"."n_name", + "t2"."n_regionkey", + "t2"."n_comment" + FROM "nation" AS "t2" + ) AS "t6" + ON "t6"."n_nationkey" = "t8"."s_nationkey" + ) AS "t11" + WHERE + "t11"."n_name" = 'GERMANY' + ) AS "t12" + GROUP BY + 1 +) AS "t13" +WHERE + "t13"."value" > ( + ( SELECT - anon_1.total + SUM("t12"."ps_supplycost" * "t12"."ps_availqty") AS "Sum(Multiply(ps_supplycost, ps_availqty))" FROM ( SELECT - SUM(t2.ps_supplycost * t2.ps_availqty) AS total - FROM hive.ibis_sf1.partsupp AS t2 - JOIN hive.ibis_sf1.supplier AS t3 - ON t2.ps_suppkey = t3.s_suppkey - JOIN hive.ibis_sf1.nation AS t4 - ON t4.n_nationkey = t3.s_nationkey + "t11"."ps_partkey", + "t11"."ps_suppkey", + "t11"."ps_availqty", + "t11"."ps_supplycost", + "t11"."ps_comment", + "t11"."s_suppkey", + "t11"."s_name", + "t11"."s_address", + "t11"."s_nationkey", + "t11"."s_phone", + "t11"."s_acctbal", + "t11"."s_comment", + "t11"."n_nationkey", + "t11"."n_name", + "t11"."n_regionkey", + "t11"."n_comment" + FROM ( + SELECT + "t7"."ps_partkey", + "t7"."ps_suppkey", + "t7"."ps_availqty", + "t7"."ps_supplycost", + "t7"."ps_comment", + "t8"."s_suppkey", + "t8"."s_name", + "t8"."s_address", + "t8"."s_nationkey", + "t8"."s_phone", + "t8"."s_acctbal", + "t8"."s_comment", + "t6"."n_nationkey", + "t6"."n_name", + "t6"."n_regionkey", + "t6"."n_comment" + FROM ( + SELECT + "t0"."ps_partkey", + "t0"."ps_suppkey", + "t0"."ps_availqty", + CAST("t0"."ps_supplycost" AS DECIMAL(15, 2)) AS "ps_supplycost", + "t0"."ps_comment" + FROM "partsupp" AS "t0" + ) AS "t7" + INNER JOIN ( + SELECT + "t1"."s_suppkey", + "t1"."s_name", + "t1"."s_address", + "t1"."s_nationkey", + "t1"."s_phone", + CAST("t1"."s_acctbal" AS DECIMAL(15, 2)) AS "s_acctbal", + "t1"."s_comment" + FROM "supplier" AS "t1" + ) AS "t8" + ON "t7"."ps_suppkey" = "t8"."s_suppkey" + INNER JOIN ( + SELECT + "t2"."n_nationkey", + "t2"."n_name", + "t2"."n_regionkey", + "t2"."n_comment" + FROM "nation" AS "t2" + ) AS "t6" + ON "t6"."n_nationkey" = "t8"."s_nationkey" + ) AS "t11" WHERE - t4.n_name = 'GERMANY' - ) AS anon_1 - ) * 0.0001 -) AS t1 + "t11"."n_name" = 'GERMANY' + ) AS "t12" + ) * CAST(0.0001 AS DOUBLE) + ) ORDER BY - t1.value DESC \ No newline at end of file + "t13"."value" DESC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h12/test_tpc_h12/trino/h12.sql b/ibis/backends/tests/tpch/snapshots/test_h12/test_tpc_h12/trino/h12.sql index f504a7a81a8f..444f5d44b978 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h12/test_tpc_h12/trino/h12.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h12/test_tpc_h12/trino/h12.sql @@ -1,23 +1,114 @@ SELECT - t0.l_shipmode, - t0.high_line_count, - t0.low_line_count + "t9"."l_shipmode", + "t9"."high_line_count", + "t9"."low_line_count" FROM ( SELECT - t2.l_shipmode AS l_shipmode, - SUM(CASE t1.o_orderpriority WHEN '1-URGENT' THEN 1 WHEN '2-HIGH' THEN 1 ELSE 0 END) AS high_line_count, - SUM(CASE t1.o_orderpriority WHEN '1-URGENT' THEN 0 WHEN '2-HIGH' THEN 0 ELSE 1 END) AS low_line_count - FROM hive.ibis_sf1.orders AS t1 - JOIN hive.ibis_sf1.lineitem AS t2 - ON t1.o_orderkey = t2.l_orderkey - WHERE - t2.l_shipmode IN ('MAIL', 'SHIP') - AND t2.l_commitdate < t2.l_receiptdate - AND t2.l_shipdate < t2.l_commitdate - AND t2.l_receiptdate >= FROM_ISO8601_DATE('1994-01-01') - AND t2.l_receiptdate < FROM_ISO8601_DATE('1995-01-01') + "t8"."l_shipmode", + SUM( + CASE "t8"."o_orderpriority" WHEN '1-URGENT' THEN 1 WHEN '2-HIGH' THEN 1 ELSE 0 END + ) AS "high_line_count", + SUM( + CASE "t8"."o_orderpriority" WHEN '1-URGENT' THEN 0 WHEN '2-HIGH' THEN 0 ELSE 1 END + ) AS "low_line_count" + FROM ( + SELECT + "t7"."o_orderkey", + "t7"."o_custkey", + "t7"."o_orderstatus", + "t7"."o_totalprice", + "t7"."o_orderdate", + "t7"."o_orderpriority", + "t7"."o_clerk", + "t7"."o_shippriority", + "t7"."o_comment", + "t7"."l_orderkey", + "t7"."l_partkey", + "t7"."l_suppkey", + "t7"."l_linenumber", + "t7"."l_quantity", + "t7"."l_extendedprice", + "t7"."l_discount", + "t7"."l_tax", + "t7"."l_returnflag", + "t7"."l_linestatus", + "t7"."l_shipdate", + "t7"."l_commitdate", + "t7"."l_receiptdate", + "t7"."l_shipinstruct", + "t7"."l_shipmode", + "t7"."l_comment" + FROM ( + SELECT + "t4"."o_orderkey", + "t4"."o_custkey", + "t4"."o_orderstatus", + "t4"."o_totalprice", + "t4"."o_orderdate", + "t4"."o_orderpriority", + "t4"."o_clerk", + "t4"."o_shippriority", + "t4"."o_comment", + "t5"."l_orderkey", + "t5"."l_partkey", + "t5"."l_suppkey", + "t5"."l_linenumber", + "t5"."l_quantity", + "t5"."l_extendedprice", + "t5"."l_discount", + "t5"."l_tax", + "t5"."l_returnflag", + "t5"."l_linestatus", + "t5"."l_shipdate", + "t5"."l_commitdate", + "t5"."l_receiptdate", + "t5"."l_shipinstruct", + "t5"."l_shipmode", + "t5"."l_comment" + FROM ( + SELECT + "t0"."o_orderkey", + "t0"."o_custkey", + "t0"."o_orderstatus", + CAST("t0"."o_totalprice" AS DECIMAL(15, 2)) AS "o_totalprice", + "t0"."o_orderdate", + "t0"."o_orderpriority", + "t0"."o_clerk", + "t0"."o_shippriority", + "t0"."o_comment" + FROM "orders" AS "t0" + ) AS "t4" + INNER JOIN ( + SELECT + "t1"."l_orderkey", + "t1"."l_partkey", + "t1"."l_suppkey", + "t1"."l_linenumber", + CAST("t1"."l_quantity" AS DECIMAL(15, 2)) AS "l_quantity", + CAST("t1"."l_extendedprice" AS DECIMAL(15, 2)) AS "l_extendedprice", + CAST("t1"."l_discount" AS DECIMAL(15, 2)) AS "l_discount", + CAST("t1"."l_tax" AS DECIMAL(15, 2)) AS "l_tax", + "t1"."l_returnflag", + "t1"."l_linestatus", + "t1"."l_shipdate", + "t1"."l_commitdate", + "t1"."l_receiptdate", + "t1"."l_shipinstruct", + "t1"."l_shipmode", + "t1"."l_comment" + FROM "lineitem" AS "t1" + ) AS "t5" + ON "t4"."o_orderkey" = "t5"."l_orderkey" + ) AS "t7" + WHERE + "t7"."l_shipmode" IN ('MAIL', 'SHIP') + AND "t7"."l_commitdate" < "t7"."l_receiptdate" + AND "t7"."l_shipdate" < "t7"."l_commitdate" + AND "t7"."l_receiptdate" >= FROM_ISO8601_DATE('1994-01-01') + AND "t7"."l_receiptdate" < FROM_ISO8601_DATE('1995-01-01') + ) AS "t8" GROUP BY 1 -) AS t0 +) AS "t9" ORDER BY - t0.l_shipmode ASC \ No newline at end of file + "t9"."l_shipmode" ASC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h13/test_tpc_h13/trino/h13.sql b/ibis/backends/tests/tpch/snapshots/test_h13/test_tpc_h13/trino/h13.sql index 5e9327de74bb..dff39fad956b 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h13/test_tpc_h13/trino/h13.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h13/test_tpc_h13/trino/h13.sql @@ -1,24 +1,69 @@ -WITH t0 AS ( - SELECT - t2.c_custkey AS c_custkey, - COUNT(t3.o_orderkey) AS c_count - FROM hive.ibis_sf1.customer AS t2 - LEFT OUTER JOIN hive.ibis_sf1.orders AS t3 - ON t2.c_custkey = t3.o_custkey AND NOT t3.o_comment LIKE '%special%requests%' - GROUP BY - 1 -) SELECT - t1.c_count, - t1.custdist + "t9"."c_count", + "t9"."custdist" FROM ( SELECT - t0.c_count AS c_count, - COUNT(*) AS custdist - FROM t0 + "t8"."c_count", + COUNT(*) AS "custdist" + FROM ( + SELECT + "t7"."c_custkey", + COUNT("t7"."o_orderkey") AS "c_count" + FROM ( + SELECT + "t4"."c_custkey", + "t4"."c_name", + "t4"."c_address", + "t4"."c_nationkey", + "t4"."c_phone", + "t4"."c_acctbal", + "t4"."c_mktsegment", + "t4"."c_comment", + "t5"."o_orderkey", + "t5"."o_custkey", + "t5"."o_orderstatus", + "t5"."o_totalprice", + "t5"."o_orderdate", + "t5"."o_orderpriority", + "t5"."o_clerk", + "t5"."o_shippriority", + "t5"."o_comment" + FROM ( + SELECT + "t0"."c_custkey", + "t0"."c_name", + "t0"."c_address", + "t0"."c_nationkey", + "t0"."c_phone", + CAST("t0"."c_acctbal" AS DECIMAL(15, 2)) AS "c_acctbal", + "t0"."c_mktsegment", + "t0"."c_comment" + FROM "customer" AS "t0" + ) AS "t4" + LEFT OUTER JOIN ( + SELECT + "t1"."o_orderkey", + "t1"."o_custkey", + "t1"."o_orderstatus", + CAST("t1"."o_totalprice" AS DECIMAL(15, 2)) AS "o_totalprice", + "t1"."o_orderdate", + "t1"."o_orderpriority", + "t1"."o_clerk", + "t1"."o_shippriority", + "t1"."o_comment" + FROM "orders" AS "t1" + ) AS "t5" + ON "t4"."c_custkey" = "t5"."o_custkey" + AND NOT ( + "t5"."o_comment" LIKE '%special%requests%' + ) + ) AS "t7" + GROUP BY + 1 + ) AS "t8" GROUP BY 1 -) AS t1 +) AS "t9" ORDER BY - t1.custdist DESC, - t1.c_count DESC \ No newline at end of file + "t9"."custdist" DESC, + "t9"."c_count" DESC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h14/test_tpc_h14/trino/h14.sql b/ibis/backends/tests/tpch/snapshots/test_h14/test_tpc_h14/trino/h14.sql index aa6ce1815a1f..be7f2a998089 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h14/test_tpc_h14/trino/h14.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h14/test_tpc_h14/trino/h14.sql @@ -1,14 +1,103 @@ SELECT - ( - SUM(IF(t1.p_type LIKE 'PROMO%', t0.l_extendedprice * ( - 1 - t0.l_discount - ), 0)) * 100 - ) / SUM(t0.l_extendedprice * ( - 1 - t0.l_discount - )) AS promo_revenue -FROM hive.ibis_sf1.lineitem AS t0 -JOIN hive.ibis_sf1.part AS t1 - ON t0.l_partkey = t1.p_partkey -WHERE - t0.l_shipdate >= FROM_ISO8601_DATE('1995-09-01') - AND t0.l_shipdate < FROM_ISO8601_DATE('1995-10-01') \ No newline at end of file + CAST(( + SUM( + IF("t8"."p_type" LIKE 'PROMO%', "t8"."l_extendedprice" * ( + 1 - "t8"."l_discount" + ), 0) + ) * 100 + ) AS DOUBLE) / SUM("t8"."l_extendedprice" * ( + 1 - "t8"."l_discount" + )) AS "promo_revenue" +FROM ( + SELECT + "t7"."l_orderkey", + "t7"."l_partkey", + "t7"."l_suppkey", + "t7"."l_linenumber", + "t7"."l_quantity", + "t7"."l_extendedprice", + "t7"."l_discount", + "t7"."l_tax", + "t7"."l_returnflag", + "t7"."l_linestatus", + "t7"."l_shipdate", + "t7"."l_commitdate", + "t7"."l_receiptdate", + "t7"."l_shipinstruct", + "t7"."l_shipmode", + "t7"."l_comment", + "t7"."p_partkey", + "t7"."p_name", + "t7"."p_mfgr", + "t7"."p_brand", + "t7"."p_type", + "t7"."p_size", + "t7"."p_container", + "t7"."p_retailprice", + "t7"."p_comment" + FROM ( + SELECT + "t4"."l_orderkey", + "t4"."l_partkey", + "t4"."l_suppkey", + "t4"."l_linenumber", + "t4"."l_quantity", + "t4"."l_extendedprice", + "t4"."l_discount", + "t4"."l_tax", + "t4"."l_returnflag", + "t4"."l_linestatus", + "t4"."l_shipdate", + "t4"."l_commitdate", + "t4"."l_receiptdate", + "t4"."l_shipinstruct", + "t4"."l_shipmode", + "t4"."l_comment", + "t5"."p_partkey", + "t5"."p_name", + "t5"."p_mfgr", + "t5"."p_brand", + "t5"."p_type", + "t5"."p_size", + "t5"."p_container", + "t5"."p_retailprice", + "t5"."p_comment" + FROM ( + SELECT + "t0"."l_orderkey", + "t0"."l_partkey", + "t0"."l_suppkey", + "t0"."l_linenumber", + CAST("t0"."l_quantity" AS DECIMAL(15, 2)) AS "l_quantity", + CAST("t0"."l_extendedprice" AS DECIMAL(15, 2)) AS "l_extendedprice", + CAST("t0"."l_discount" AS DECIMAL(15, 2)) AS "l_discount", + CAST("t0"."l_tax" AS DECIMAL(15, 2)) AS "l_tax", + "t0"."l_returnflag", + "t0"."l_linestatus", + "t0"."l_shipdate", + "t0"."l_commitdate", + "t0"."l_receiptdate", + "t0"."l_shipinstruct", + "t0"."l_shipmode", + "t0"."l_comment" + FROM "lineitem" AS "t0" + ) AS "t4" + INNER JOIN ( + SELECT + "t1"."p_partkey", + "t1"."p_name", + "t1"."p_mfgr", + "t1"."p_brand", + "t1"."p_type", + "t1"."p_size", + "t1"."p_container", + CAST("t1"."p_retailprice" AS DECIMAL(15, 2)) AS "p_retailprice", + "t1"."p_comment" + FROM "part" AS "t1" + ) AS "t5" + ON "t4"."l_partkey" = "t5"."p_partkey" + ) AS "t7" + WHERE + "t7"."l_shipdate" >= FROM_ISO8601_DATE('1995-09-01') + AND "t7"."l_shipdate" < FROM_ISO8601_DATE('1995-10-01') +) AS "t8" \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h15/test_tpc_h15/trino/h15.sql b/ibis/backends/tests/tpch/snapshots/test_h15/test_tpc_h15/trino/h15.sql index 1f1b58bc18e6..423ef0b3245b 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h15/test_tpc_h15/trino/h15.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h15/test_tpc_h15/trino/h15.sql @@ -1,54 +1,125 @@ -WITH t0 AS ( - SELECT - t3.l_suppkey AS l_suppkey, - SUM(t3.l_extendedprice * ( - 1 - t3.l_discount - )) AS total_revenue - FROM hive.ibis_sf1.lineitem AS t3 - WHERE - t3.l_shipdate >= FROM_ISO8601_DATE('1996-01-01') - AND t3.l_shipdate < FROM_ISO8601_DATE('1996-04-01') - GROUP BY - 1 -), t1 AS ( - SELECT - t3.s_suppkey AS s_suppkey, - t3.s_name AS s_name, - t3.s_address AS s_address, - t3.s_nationkey AS s_nationkey, - t3.s_phone AS s_phone, - t3.s_acctbal AS s_acctbal, - t3.s_comment AS s_comment, - t0.l_suppkey AS l_suppkey, - t0.total_revenue AS total_revenue - FROM hive.ibis_sf1.supplier AS t3 - JOIN t0 - ON t3.s_suppkey = t0.l_suppkey - WHERE - t0.total_revenue = ( - SELECT - MAX(t0.total_revenue) AS "Max(total_revenue)" - FROM t0 - ) -) SELECT - t2.s_suppkey, - t2.s_name, - t2.s_address, - t2.s_phone, - t2.total_revenue + "t8"."s_suppkey", + "t8"."s_name", + "t8"."s_address", + "t8"."s_phone", + "t8"."total_revenue" FROM ( SELECT - t1.s_suppkey AS s_suppkey, - t1.s_name AS s_name, - t1.s_address AS s_address, - t1.s_nationkey AS s_nationkey, - t1.s_phone AS s_phone, - t1.s_acctbal AS s_acctbal, - t1.s_comment AS s_comment, - t1.l_suppkey AS l_suppkey, - t1.total_revenue AS total_revenue - FROM t1 - ORDER BY - t1.s_suppkey ASC -) AS t2 \ No newline at end of file + "t4"."s_suppkey", + "t4"."s_name", + "t4"."s_address", + "t4"."s_nationkey", + "t4"."s_phone", + "t4"."s_acctbal", + "t4"."s_comment", + "t6"."l_suppkey", + "t6"."total_revenue" + FROM ( + SELECT + "t0"."s_suppkey", + "t0"."s_name", + "t0"."s_address", + "t0"."s_nationkey", + "t0"."s_phone", + CAST("t0"."s_acctbal" AS DECIMAL(15, 2)) AS "s_acctbal", + "t0"."s_comment" + FROM "supplier" AS "t0" + ) AS "t4" + INNER JOIN ( + SELECT + "t3"."l_suppkey", + SUM("t3"."l_extendedprice" * ( + 1 - "t3"."l_discount" + )) AS "total_revenue" + FROM ( + SELECT + "t1"."l_orderkey", + "t1"."l_partkey", + "t1"."l_suppkey", + "t1"."l_linenumber", + CAST("t1"."l_quantity" AS DECIMAL(15, 2)) AS "l_quantity", + CAST("t1"."l_extendedprice" AS DECIMAL(15, 2)) AS "l_extendedprice", + CAST("t1"."l_discount" AS DECIMAL(15, 2)) AS "l_discount", + CAST("t1"."l_tax" AS DECIMAL(15, 2)) AS "l_tax", + "t1"."l_returnflag", + "t1"."l_linestatus", + "t1"."l_shipdate", + "t1"."l_commitdate", + "t1"."l_receiptdate", + "t1"."l_shipinstruct", + "t1"."l_shipmode", + "t1"."l_comment" + FROM "lineitem" AS "t1" + WHERE + "t1"."l_shipdate" >= FROM_ISO8601_DATE('1996-01-01') + AND "t1"."l_shipdate" < FROM_ISO8601_DATE('1996-04-01') + ) AS "t3" + GROUP BY + 1 + ) AS "t6" + ON "t4"."s_suppkey" = "t6"."l_suppkey" +) AS "t8" +WHERE + "t8"."total_revenue" = ( + SELECT + MAX("t8"."total_revenue") AS "Max(total_revenue)" + FROM ( + SELECT + "t4"."s_suppkey", + "t4"."s_name", + "t4"."s_address", + "t4"."s_nationkey", + "t4"."s_phone", + "t4"."s_acctbal", + "t4"."s_comment", + "t6"."l_suppkey", + "t6"."total_revenue" + FROM ( + SELECT + "t0"."s_suppkey", + "t0"."s_name", + "t0"."s_address", + "t0"."s_nationkey", + "t0"."s_phone", + CAST("t0"."s_acctbal" AS DECIMAL(15, 2)) AS "s_acctbal", + "t0"."s_comment" + FROM "supplier" AS "t0" + ) AS "t4" + INNER JOIN ( + SELECT + "t3"."l_suppkey", + SUM("t3"."l_extendedprice" * ( + 1 - "t3"."l_discount" + )) AS "total_revenue" + FROM ( + SELECT + "t1"."l_orderkey", + "t1"."l_partkey", + "t1"."l_suppkey", + "t1"."l_linenumber", + CAST("t1"."l_quantity" AS DECIMAL(15, 2)) AS "l_quantity", + CAST("t1"."l_extendedprice" AS DECIMAL(15, 2)) AS "l_extendedprice", + CAST("t1"."l_discount" AS DECIMAL(15, 2)) AS "l_discount", + CAST("t1"."l_tax" AS DECIMAL(15, 2)) AS "l_tax", + "t1"."l_returnflag", + "t1"."l_linestatus", + "t1"."l_shipdate", + "t1"."l_commitdate", + "t1"."l_receiptdate", + "t1"."l_shipinstruct", + "t1"."l_shipmode", + "t1"."l_comment" + FROM "lineitem" AS "t1" + WHERE + "t1"."l_shipdate" >= FROM_ISO8601_DATE('1996-01-01') + AND "t1"."l_shipdate" < FROM_ISO8601_DATE('1996-04-01') + ) AS "t3" + GROUP BY + 1 + ) AS "t6" + ON "t4"."s_suppkey" = "t6"."l_suppkey" + ) AS "t8" + ) +ORDER BY + "t8"."s_suppkey" ASC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h16/test_tpc_h16/trino/h16.sql b/ibis/backends/tests/tpch/snapshots/test_h16/test_tpc_h16/trino/h16.sql index 3eab28115bdc..f1681099f881 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h16/test_tpc_h16/trino/h16.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h16/test_tpc_h16/trino/h16.sql @@ -1,47 +1,93 @@ SELECT - t0.p_brand, - t0.p_type, - t0.p_size, - t0.supplier_cnt + "t11"."p_brand", + "t11"."p_type", + "t11"."p_size", + "t11"."supplier_cnt" FROM ( SELECT - t2.p_brand AS p_brand, - t2.p_type AS p_type, - t2.p_size AS p_size, - COUNT(DISTINCT t1.ps_suppkey) AS supplier_cnt - FROM hive.ibis_sf1.partsupp AS t1 - JOIN hive.ibis_sf1.part AS t2 - ON t2.p_partkey = t1.ps_partkey - WHERE - t2.p_brand <> 'Brand#45' - AND NOT t2.p_type LIKE 'MEDIUM POLISHED%' - AND t2.p_size IN (49, 14, 23, 45, 19, 3, 36, 9) - AND ( - NOT t1.ps_suppkey IN ( + "t10"."p_brand", + "t10"."p_type", + "t10"."p_size", + COUNT(DISTINCT "t10"."ps_suppkey") AS "supplier_cnt" + FROM ( + SELECT + "t9"."ps_partkey", + "t9"."ps_suppkey", + "t9"."ps_availqty", + "t9"."ps_supplycost", + "t9"."ps_comment", + "t9"."p_partkey", + "t9"."p_name", + "t9"."p_mfgr", + "t9"."p_brand", + "t9"."p_type", + "t9"."p_size", + "t9"."p_container", + "t9"."p_retailprice", + "t9"."p_comment" + FROM ( + SELECT + "t6"."ps_partkey", + "t6"."ps_suppkey", + "t6"."ps_availqty", + "t6"."ps_supplycost", + "t6"."ps_comment", + "t7"."p_partkey", + "t7"."p_name", + "t7"."p_mfgr", + "t7"."p_brand", + "t7"."p_type", + "t7"."p_size", + "t7"."p_container", + "t7"."p_retailprice", + "t7"."p_comment" + FROM ( SELECT - t3.s_suppkey - FROM ( + "t0"."ps_partkey", + "t0"."ps_suppkey", + "t0"."ps_availqty", + CAST("t0"."ps_supplycost" AS DECIMAL(15, 2)) AS "ps_supplycost", + "t0"."ps_comment" + FROM "partsupp" AS "t0" + ) AS "t6" + INNER JOIN ( + SELECT + "t2"."p_partkey", + "t2"."p_name", + "t2"."p_mfgr", + "t2"."p_brand", + "t2"."p_type", + "t2"."p_size", + "t2"."p_container", + CAST("t2"."p_retailprice" AS DECIMAL(15, 2)) AS "p_retailprice", + "t2"."p_comment" + FROM "part" AS "t2" + ) AS "t7" + ON "t7"."p_partkey" = "t6"."ps_partkey" + ) AS "t9" + WHERE + "t9"."p_brand" <> 'Brand#45' + AND NOT ( + "t9"."p_type" LIKE 'MEDIUM POLISHED%' + ) + AND "t9"."p_size" IN (49, 14, 23, 45, 19, 3, 36, 9) + AND NOT ( + "t9"."ps_suppkey" IN ( SELECT - t4.s_suppkey AS s_suppkey, - t4.s_name AS s_name, - t4.s_address AS s_address, - t4.s_nationkey AS s_nationkey, - t4.s_phone AS s_phone, - t4.s_acctbal AS s_acctbal, - t4.s_comment AS s_comment - FROM hive.ibis_sf1.supplier AS t4 + "t1"."s_suppkey" + FROM "supplier" AS "t1" WHERE - t4.s_comment LIKE '%Customer%Complaints%' - ) AS t3 + "t1"."s_comment" LIKE '%Customer%Complaints%' + ) ) - ) + ) AS "t10" GROUP BY 1, 2, 3 -) AS t0 +) AS "t11" ORDER BY - t0.supplier_cnt DESC, - t0.p_brand ASC, - t0.p_type ASC, - t0.p_size ASC \ No newline at end of file + "t11"."supplier_cnt" DESC, + "t11"."p_brand" ASC, + "t11"."p_type" ASC, + "t11"."p_size" ASC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h17/test_tpc_h17/trino/h17.sql b/ibis/backends/tests/tpch/snapshots/test_h17/test_tpc_h17/trino/h17.sql index a964aea72736..3c5f39dcc1d8 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h17/test_tpc_h17/trino/h17.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h17/test_tpc_h17/trino/h17.sql @@ -1,15 +1,123 @@ SELECT - SUM(t0.l_extendedprice) / 7.0 AS avg_yearly -FROM hive.ibis_sf1.lineitem AS t0 -JOIN hive.ibis_sf1.part AS t1 - ON t1.p_partkey = t0.l_partkey -WHERE - t1.p_brand = 'Brand#23' - AND t1.p_container = 'MED BOX' - AND t0.l_quantity < ( + SUM("t10"."l_extendedprice") / CAST(7.0 AS DOUBLE) AS "avg_yearly" +FROM ( + SELECT + "t7"."l_orderkey", + "t7"."l_partkey", + "t7"."l_suppkey", + "t7"."l_linenumber", + "t7"."l_quantity", + "t7"."l_extendedprice", + "t7"."l_discount", + "t7"."l_tax", + "t7"."l_returnflag", + "t7"."l_linestatus", + "t7"."l_shipdate", + "t7"."l_commitdate", + "t7"."l_receiptdate", + "t7"."l_shipinstruct", + "t7"."l_shipmode", + "t7"."l_comment", + "t7"."p_partkey", + "t7"."p_name", + "t7"."p_mfgr", + "t7"."p_brand", + "t7"."p_type", + "t7"."p_size", + "t7"."p_container", + "t7"."p_retailprice", + "t7"."p_comment" + FROM ( SELECT - AVG(t0.l_quantity) AS "Mean(l_quantity)" - FROM hive.ibis_sf1.lineitem AS t0 - WHERE - t0.l_partkey = t1.p_partkey - ) * 0.2 \ No newline at end of file + "t4"."l_orderkey", + "t4"."l_partkey", + "t4"."l_suppkey", + "t4"."l_linenumber", + "t4"."l_quantity", + "t4"."l_extendedprice", + "t4"."l_discount", + "t4"."l_tax", + "t4"."l_returnflag", + "t4"."l_linestatus", + "t4"."l_shipdate", + "t4"."l_commitdate", + "t4"."l_receiptdate", + "t4"."l_shipinstruct", + "t4"."l_shipmode", + "t4"."l_comment", + "t5"."p_partkey", + "t5"."p_name", + "t5"."p_mfgr", + "t5"."p_brand", + "t5"."p_type", + "t5"."p_size", + "t5"."p_container", + "t5"."p_retailprice", + "t5"."p_comment" + FROM ( + SELECT + "t0"."l_orderkey", + "t0"."l_partkey", + "t0"."l_suppkey", + "t0"."l_linenumber", + CAST("t0"."l_quantity" AS DECIMAL(15, 2)) AS "l_quantity", + CAST("t0"."l_extendedprice" AS DECIMAL(15, 2)) AS "l_extendedprice", + CAST("t0"."l_discount" AS DECIMAL(15, 2)) AS "l_discount", + CAST("t0"."l_tax" AS DECIMAL(15, 2)) AS "l_tax", + "t0"."l_returnflag", + "t0"."l_linestatus", + "t0"."l_shipdate", + "t0"."l_commitdate", + "t0"."l_receiptdate", + "t0"."l_shipinstruct", + "t0"."l_shipmode", + "t0"."l_comment" + FROM "lineitem" AS "t0" + ) AS "t4" + INNER JOIN ( + SELECT + "t1"."p_partkey", + "t1"."p_name", + "t1"."p_mfgr", + "t1"."p_brand", + "t1"."p_type", + "t1"."p_size", + "t1"."p_container", + CAST("t1"."p_retailprice" AS DECIMAL(15, 2)) AS "p_retailprice", + "t1"."p_comment" + FROM "part" AS "t1" + ) AS "t5" + ON "t5"."p_partkey" = "t4"."l_partkey" + ) AS "t7" + WHERE + "t7"."p_brand" = 'Brand#23' + AND "t7"."p_container" = 'MED BOX' + AND "t7"."l_quantity" < ( + ( + SELECT + AVG("t8"."l_quantity") AS "Mean(l_quantity)" + FROM ( + SELECT + "t0"."l_orderkey", + "t0"."l_partkey", + "t0"."l_suppkey", + "t0"."l_linenumber", + CAST("t0"."l_quantity" AS DECIMAL(15, 2)) AS "l_quantity", + CAST("t0"."l_extendedprice" AS DECIMAL(15, 2)) AS "l_extendedprice", + CAST("t0"."l_discount" AS DECIMAL(15, 2)) AS "l_discount", + CAST("t0"."l_tax" AS DECIMAL(15, 2)) AS "l_tax", + "t0"."l_returnflag", + "t0"."l_linestatus", + "t0"."l_shipdate", + "t0"."l_commitdate", + "t0"."l_receiptdate", + "t0"."l_shipinstruct", + "t0"."l_shipmode", + "t0"."l_comment" + FROM "lineitem" AS "t0" + WHERE + "t0"."l_partkey" = "t7"."p_partkey" + ) AS "t8" + ) * CAST(0.2 AS DOUBLE) + ) +) AS "t10" \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h18/test_tpc_h18/trino/h18.sql b/ibis/backends/tests/tpch/snapshots/test_h18/test_tpc_h18/trino/h18.sql index be697b0061d3..f84e31100199 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h18/test_tpc_h18/trino/h18.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h18/test_tpc_h18/trino/h18.sql @@ -1,52 +1,179 @@ -WITH t0 AS ( - SELECT - t2.l_orderkey AS l_orderkey, - SUM(t2.l_quantity) AS qty_sum - FROM hive.ibis_sf1.lineitem AS t2 - GROUP BY - 1 -) SELECT - t1.c_name, - t1.c_custkey, - t1.o_orderkey, - t1.o_orderdate, - t1.o_totalprice, - t1.sum_qty + "t15"."c_name", + "t15"."c_custkey", + "t15"."o_orderkey", + "t15"."o_orderdate", + "t15"."o_totalprice", + "t15"."sum_qty" FROM ( SELECT - t2.c_name AS c_name, - t2.c_custkey AS c_custkey, - t3.o_orderkey AS o_orderkey, - t3.o_orderdate AS o_orderdate, - t3.o_totalprice AS o_totalprice, - SUM(t4.l_quantity) AS sum_qty - FROM hive.ibis_sf1.customer AS t2 - JOIN hive.ibis_sf1.orders AS t3 - ON t2.c_custkey = t3.o_custkey - JOIN hive.ibis_sf1.lineitem AS t4 - ON t3.o_orderkey = t4.l_orderkey - WHERE - t3.o_orderkey IN ( + "t14"."c_name", + "t14"."c_custkey", + "t14"."o_orderkey", + "t14"."o_orderdate", + "t14"."o_totalprice", + SUM("t14"."l_quantity") AS "sum_qty" + FROM ( + SELECT + "t12"."c_custkey", + "t12"."c_name", + "t12"."c_address", + "t12"."c_nationkey", + "t12"."c_phone", + "t12"."c_acctbal", + "t12"."c_mktsegment", + "t12"."c_comment", + "t12"."o_orderkey", + "t12"."o_custkey", + "t12"."o_orderstatus", + "t12"."o_totalprice", + "t12"."o_orderdate", + "t12"."o_orderpriority", + "t12"."o_clerk", + "t12"."o_shippriority", + "t12"."o_comment", + "t12"."l_orderkey", + "t12"."l_partkey", + "t12"."l_suppkey", + "t12"."l_linenumber", + "t12"."l_quantity", + "t12"."l_extendedprice", + "t12"."l_discount", + "t12"."l_tax", + "t12"."l_returnflag", + "t12"."l_linestatus", + "t12"."l_shipdate", + "t12"."l_commitdate", + "t12"."l_receiptdate", + "t12"."l_shipinstruct", + "t12"."l_shipmode", + "t12"."l_comment" + FROM ( SELECT - t5.l_orderkey + "t6"."c_custkey", + "t6"."c_name", + "t6"."c_address", + "t6"."c_nationkey", + "t6"."c_phone", + "t6"."c_acctbal", + "t6"."c_mktsegment", + "t6"."c_comment", + "t7"."o_orderkey", + "t7"."o_custkey", + "t7"."o_orderstatus", + "t7"."o_totalprice", + "t7"."o_orderdate", + "t7"."o_orderpriority", + "t7"."o_clerk", + "t7"."o_shippriority", + "t7"."o_comment", + "t8"."l_orderkey", + "t8"."l_partkey", + "t8"."l_suppkey", + "t8"."l_linenumber", + "t8"."l_quantity", + "t8"."l_extendedprice", + "t8"."l_discount", + "t8"."l_tax", + "t8"."l_returnflag", + "t8"."l_linestatus", + "t8"."l_shipdate", + "t8"."l_commitdate", + "t8"."l_receiptdate", + "t8"."l_shipinstruct", + "t8"."l_shipmode", + "t8"."l_comment" FROM ( SELECT - t0.l_orderkey AS l_orderkey, - t0.qty_sum AS qty_sum - FROM t0 + "t0"."c_custkey", + "t0"."c_name", + "t0"."c_address", + "t0"."c_nationkey", + "t0"."c_phone", + CAST("t0"."c_acctbal" AS DECIMAL(15, 2)) AS "c_acctbal", + "t0"."c_mktsegment", + "t0"."c_comment" + FROM "customer" AS "t0" + ) AS "t6" + INNER JOIN ( + SELECT + "t1"."o_orderkey", + "t1"."o_custkey", + "t1"."o_orderstatus", + CAST("t1"."o_totalprice" AS DECIMAL(15, 2)) AS "o_totalprice", + "t1"."o_orderdate", + "t1"."o_orderpriority", + "t1"."o_clerk", + "t1"."o_shippriority", + "t1"."o_comment" + FROM "orders" AS "t1" + ) AS "t7" + ON "t6"."c_custkey" = "t7"."o_custkey" + INNER JOIN ( + SELECT + "t2"."l_orderkey", + "t2"."l_partkey", + "t2"."l_suppkey", + "t2"."l_linenumber", + CAST("t2"."l_quantity" AS DECIMAL(15, 2)) AS "l_quantity", + CAST("t2"."l_extendedprice" AS DECIMAL(15, 2)) AS "l_extendedprice", + CAST("t2"."l_discount" AS DECIMAL(15, 2)) AS "l_discount", + CAST("t2"."l_tax" AS DECIMAL(15, 2)) AS "l_tax", + "t2"."l_returnflag", + "t2"."l_linestatus", + "t2"."l_shipdate", + "t2"."l_commitdate", + "t2"."l_receiptdate", + "t2"."l_shipinstruct", + "t2"."l_shipmode", + "t2"."l_comment" + FROM "lineitem" AS "t2" + ) AS "t8" + ON "t7"."o_orderkey" = "t8"."l_orderkey" + ) AS "t12" + WHERE + "t12"."o_orderkey" IN ( + SELECT + "t9"."l_orderkey" + FROM ( + SELECT + "t5"."l_orderkey", + SUM("t5"."l_quantity") AS "qty_sum" + FROM ( + SELECT + "t2"."l_orderkey", + "t2"."l_partkey", + "t2"."l_suppkey", + "t2"."l_linenumber", + CAST("t2"."l_quantity" AS DECIMAL(15, 2)) AS "l_quantity", + CAST("t2"."l_extendedprice" AS DECIMAL(15, 2)) AS "l_extendedprice", + CAST("t2"."l_discount" AS DECIMAL(15, 2)) AS "l_discount", + CAST("t2"."l_tax" AS DECIMAL(15, 2)) AS "l_tax", + "t2"."l_returnflag", + "t2"."l_linestatus", + "t2"."l_shipdate", + "t2"."l_commitdate", + "t2"."l_receiptdate", + "t2"."l_shipinstruct", + "t2"."l_shipmode", + "t2"."l_comment" + FROM "lineitem" AS "t2" + ) AS "t5" + GROUP BY + 1 + ) AS "t9" WHERE - t0.qty_sum > 300 - ) AS t5 - ) + "t9"."qty_sum" > 300 + ) + ) AS "t14" GROUP BY 1, 2, 3, 4, 5 -) AS t1 +) AS "t15" ORDER BY - t1.o_totalprice DESC, - t1.o_orderdate ASC + "t15"."o_totalprice" DESC, + "t15"."o_orderdate" ASC LIMIT 100 \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h19/test_tpc_h19/trino/h19.sql b/ibis/backends/tests/tpch/snapshots/test_h19/test_tpc_h19/trino/h19.sql index 5c9774fb10b8..033059993529 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h19/test_tpc_h19/trino/h19.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h19/test_tpc_h19/trino/h19.sql @@ -1,29 +1,178 @@ SELECT - SUM(t0.l_extendedprice * ( - 1 - t0.l_discount - )) AS revenue -FROM hive.ibis_sf1.lineitem AS t0 -JOIN hive.ibis_sf1.part AS t1 - ON t1.p_partkey = t0.l_partkey -WHERE - t1.p_brand = 'Brand#12' - AND t1.p_container IN ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') - AND t0.l_quantity >= 1 - AND t0.l_quantity <= 11 - AND t1.p_size BETWEEN 1 AND 5 - AND t0.l_shipmode IN ('AIR', 'AIR REG') - AND t0.l_shipinstruct = 'DELIVER IN PERSON' - OR t1.p_brand = 'Brand#23' - AND t1.p_container IN ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') - AND t0.l_quantity >= 10 - AND t0.l_quantity <= 20 - AND t1.p_size BETWEEN 1 AND 10 - AND t0.l_shipmode IN ('AIR', 'AIR REG') - AND t0.l_shipinstruct = 'DELIVER IN PERSON' - OR t1.p_brand = 'Brand#34' - AND t1.p_container IN ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG') - AND t0.l_quantity >= 20 - AND t0.l_quantity <= 30 - AND t1.p_size BETWEEN 1 AND 15 - AND t0.l_shipmode IN ('AIR', 'AIR REG') - AND t0.l_shipinstruct = 'DELIVER IN PERSON' \ No newline at end of file + SUM("t8"."l_extendedprice" * ( + 1 - "t8"."l_discount" + )) AS "revenue" +FROM ( + SELECT + "t7"."l_orderkey", + "t7"."l_partkey", + "t7"."l_suppkey", + "t7"."l_linenumber", + "t7"."l_quantity", + "t7"."l_extendedprice", + "t7"."l_discount", + "t7"."l_tax", + "t7"."l_returnflag", + "t7"."l_linestatus", + "t7"."l_shipdate", + "t7"."l_commitdate", + "t7"."l_receiptdate", + "t7"."l_shipinstruct", + "t7"."l_shipmode", + "t7"."l_comment", + "t7"."p_partkey", + "t7"."p_name", + "t7"."p_mfgr", + "t7"."p_brand", + "t7"."p_type", + "t7"."p_size", + "t7"."p_container", + "t7"."p_retailprice", + "t7"."p_comment" + FROM ( + SELECT + "t4"."l_orderkey", + "t4"."l_partkey", + "t4"."l_suppkey", + "t4"."l_linenumber", + "t4"."l_quantity", + "t4"."l_extendedprice", + "t4"."l_discount", + "t4"."l_tax", + "t4"."l_returnflag", + "t4"."l_linestatus", + "t4"."l_shipdate", + "t4"."l_commitdate", + "t4"."l_receiptdate", + "t4"."l_shipinstruct", + "t4"."l_shipmode", + "t4"."l_comment", + "t5"."p_partkey", + "t5"."p_name", + "t5"."p_mfgr", + "t5"."p_brand", + "t5"."p_type", + "t5"."p_size", + "t5"."p_container", + "t5"."p_retailprice", + "t5"."p_comment" + FROM ( + SELECT + "t0"."l_orderkey", + "t0"."l_partkey", + "t0"."l_suppkey", + "t0"."l_linenumber", + CAST("t0"."l_quantity" AS DECIMAL(15, 2)) AS "l_quantity", + CAST("t0"."l_extendedprice" AS DECIMAL(15, 2)) AS "l_extendedprice", + CAST("t0"."l_discount" AS DECIMAL(15, 2)) AS "l_discount", + CAST("t0"."l_tax" AS DECIMAL(15, 2)) AS "l_tax", + "t0"."l_returnflag", + "t0"."l_linestatus", + "t0"."l_shipdate", + "t0"."l_commitdate", + "t0"."l_receiptdate", + "t0"."l_shipinstruct", + "t0"."l_shipmode", + "t0"."l_comment" + FROM "lineitem" AS "t0" + ) AS "t4" + INNER JOIN ( + SELECT + "t1"."p_partkey", + "t1"."p_name", + "t1"."p_mfgr", + "t1"."p_brand", + "t1"."p_type", + "t1"."p_size", + "t1"."p_container", + CAST("t1"."p_retailprice" AS DECIMAL(15, 2)) AS "p_retailprice", + "t1"."p_comment" + FROM "part" AS "t1" + ) AS "t5" + ON "t5"."p_partkey" = "t4"."l_partkey" + ) AS "t7" + WHERE + ( + ( + ( + ( + ( + ( + ( + ( + "t7"."p_brand" = 'Brand#12' + ) + AND "t7"."p_container" IN ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') + ) + AND ( + "t7"."l_quantity" >= 1 + ) + ) + AND ( + "t7"."l_quantity" <= 11 + ) + ) + AND "t7"."p_size" BETWEEN 1 AND 5 + ) + AND "t7"."l_shipmode" IN ('AIR', 'AIR REG') + ) + AND ( + "t7"."l_shipinstruct" = 'DELIVER IN PERSON' + ) + ) + OR ( + ( + ( + ( + ( + ( + ( + "t7"."p_brand" = 'Brand#23' + ) + AND "t7"."p_container" IN ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') + ) + AND ( + "t7"."l_quantity" >= 10 + ) + ) + AND ( + "t7"."l_quantity" <= 20 + ) + ) + AND "t7"."p_size" BETWEEN 1 AND 10 + ) + AND "t7"."l_shipmode" IN ('AIR', 'AIR REG') + ) + AND ( + "t7"."l_shipinstruct" = 'DELIVER IN PERSON' + ) + ) + ) + OR ( + ( + ( + ( + ( + ( + ( + "t7"."p_brand" = 'Brand#34' + ) + AND "t7"."p_container" IN ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG') + ) + AND ( + "t7"."l_quantity" >= 20 + ) + ) + AND ( + "t7"."l_quantity" <= 30 + ) + ) + AND "t7"."p_size" BETWEEN 1 AND 15 + ) + AND "t7"."l_shipmode" IN ('AIR', 'AIR REG') + ) + AND ( + "t7"."l_shipinstruct" = 'DELIVER IN PERSON' + ) + ) +) AS "t8" \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h20/test_tpc_h20/trino/h20.sql b/ibis/backends/tests/tpch/snapshots/test_h20/test_tpc_h20/trino/h20.sql index ae1f1a8c519e..22520f016c64 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h20/test_tpc_h20/trino/h20.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h20/test_tpc_h20/trino/h20.sql @@ -1,73 +1,93 @@ -WITH t0 AS ( +SELECT + "t13"."s_name", + "t13"."s_address" +FROM ( SELECT - t2.s_suppkey AS s_suppkey, - t2.s_name AS s_name, - t2.s_address AS s_address, - t2.s_nationkey AS s_nationkey, - t2.s_phone AS s_phone, - t2.s_acctbal AS s_acctbal, - t2.s_comment AS s_comment, - t3.n_nationkey AS n_nationkey, - t3.n_name AS n_name, - t3.n_regionkey AS n_regionkey, - t3.n_comment AS n_comment - FROM hive.ibis_sf1.supplier AS t2 - JOIN hive.ibis_sf1.nation AS t3 - ON t2.s_nationkey = t3.n_nationkey - WHERE - t3.n_name = 'CANADA' - AND t2.s_suppkey IN ( + "t10"."s_suppkey", + "t10"."s_name", + "t10"."s_address", + "t10"."s_nationkey", + "t10"."s_phone", + "t10"."s_acctbal", + "t10"."s_comment", + "t8"."n_nationkey", + "t8"."n_name", + "t8"."n_regionkey", + "t8"."n_comment" + FROM ( + SELECT + "t0"."s_suppkey", + "t0"."s_name", + "t0"."s_address", + "t0"."s_nationkey", + "t0"."s_phone", + CAST("t0"."s_acctbal" AS DECIMAL(15, 2)) AS "s_acctbal", + "t0"."s_comment" + FROM "supplier" AS "t0" + ) AS "t10" + INNER JOIN ( + SELECT + "t2"."n_nationkey", + "t2"."n_name", + "t2"."n_regionkey", + "t2"."n_comment" + FROM "nation" AS "t2" + ) AS "t8" + ON "t10"."s_nationkey" = "t8"."n_nationkey" +) AS "t13" +WHERE + "t13"."n_name" = 'CANADA' + AND "t13"."s_suppkey" IN ( + SELECT + "t7"."ps_suppkey" + FROM ( SELECT - t4.ps_suppkey - FROM ( + "t1"."ps_partkey", + "t1"."ps_suppkey", + "t1"."ps_availqty", + CAST("t1"."ps_supplycost" AS DECIMAL(15, 2)) AS "ps_supplycost", + "t1"."ps_comment" + FROM "partsupp" AS "t1" + ) AS "t7" + WHERE + "t7"."ps_partkey" IN ( SELECT - t5.ps_partkey AS ps_partkey, - t5.ps_suppkey AS ps_suppkey, - t5.ps_availqty AS ps_availqty, - t5.ps_supplycost AS ps_supplycost, - t5.ps_comment AS ps_comment - FROM hive.ibis_sf1.partsupp AS t5 + "t3"."p_partkey" + FROM "part" AS "t3" WHERE - t5.ps_partkey IN ( - SELECT - t6.p_partkey - FROM ( - SELECT - t7.p_partkey AS p_partkey, - t7.p_name AS p_name, - t7.p_mfgr AS p_mfgr, - t7.p_brand AS p_brand, - t7.p_type AS p_type, - t7.p_size AS p_size, - t7.p_container AS p_container, - t7.p_retailprice AS p_retailprice, - t7.p_comment AS p_comment - FROM hive.ibis_sf1.part AS t7 - WHERE - t7.p_name LIKE 'forest%' - ) AS t6 - ) - AND t5.ps_availqty > ( + "t3"."p_name" LIKE 'forest%' + ) + AND "t7"."ps_availqty" > ( + ( + SELECT + SUM("t11"."l_quantity") AS "Sum(l_quantity)" + FROM ( SELECT - SUM(t6.l_quantity) AS "Sum(l_quantity)" - FROM hive.ibis_sf1.lineitem AS t6 + "t4"."l_orderkey", + "t4"."l_partkey", + "t4"."l_suppkey", + "t4"."l_linenumber", + CAST("t4"."l_quantity" AS DECIMAL(15, 2)) AS "l_quantity", + CAST("t4"."l_extendedprice" AS DECIMAL(15, 2)) AS "l_extendedprice", + CAST("t4"."l_discount" AS DECIMAL(15, 2)) AS "l_discount", + CAST("t4"."l_tax" AS DECIMAL(15, 2)) AS "l_tax", + "t4"."l_returnflag", + "t4"."l_linestatus", + "t4"."l_shipdate", + "t4"."l_commitdate", + "t4"."l_receiptdate", + "t4"."l_shipinstruct", + "t4"."l_shipmode", + "t4"."l_comment" + FROM "lineitem" AS "t4" WHERE - t6.l_partkey = t5.ps_partkey - AND t6.l_suppkey = t5.ps_suppkey - AND t6.l_shipdate >= FROM_ISO8601_DATE('1994-01-01') - AND t6.l_shipdate < FROM_ISO8601_DATE('1995-01-01') - ) * 0.5 - ) AS t4 - ) -) -SELECT - t1.s_name, - t1.s_address -FROM ( - SELECT - t0.s_name AS s_name, - t0.s_address AS s_address - FROM t0 -) AS t1 + "t4"."l_partkey" = "t7"."ps_partkey" + AND "t4"."l_suppkey" = "t7"."ps_suppkey" + AND "t4"."l_shipdate" >= FROM_ISO8601_DATE('1994-01-01') + AND "t4"."l_shipdate" < FROM_ISO8601_DATE('1995-01-01') + ) AS "t11" + ) * CAST(0.5 AS DOUBLE) + ) + ) ORDER BY - t1.s_name ASC \ No newline at end of file + "t13"."s_name" ASC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h21/test_tpc_h21/trino/h21.sql b/ibis/backends/tests/tpch/snapshots/test_h21/test_tpc_h21/trino/h21.sql index 7e8f9a143f61..c7c0686a5465 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h21/test_tpc_h21/trino/h21.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h21/test_tpc_h21/trino/h21.sql @@ -1,56 +1,162 @@ -WITH t0 AS ( - SELECT - t3.l_orderkey AS l1_orderkey, - t4.o_orderstatus AS o_orderstatus, - t3.l_receiptdate AS l_receiptdate, - t3.l_commitdate AS l_commitdate, - t3.l_suppkey AS l1_suppkey, - t2.s_name AS s_name, - t5.n_name AS n_name - FROM hive.ibis_sf1.supplier AS t2 - JOIN hive.ibis_sf1.lineitem AS t3 - ON t2.s_suppkey = t3.l_suppkey - JOIN hive.ibis_sf1.orders AS t4 - ON t4.o_orderkey = t3.l_orderkey - JOIN hive.ibis_sf1.nation AS t5 - ON t2.s_nationkey = t5.n_nationkey -) SELECT - t1.s_name, - t1.numwait + "t21"."s_name", + "t21"."numwait" FROM ( SELECT - t0.s_name AS s_name, - COUNT(*) AS numwait - FROM t0 - WHERE - t0.o_orderstatus = 'F' - AND t0.l_receiptdate > t0.l_commitdate - AND t0.n_name = 'SAUDI ARABIA' - AND ( - EXISTS( + "t20"."s_name", + COUNT(*) AS "numwait" + FROM ( + SELECT + "t17"."l1_orderkey", + "t17"."o_orderstatus", + "t17"."l_receiptdate", + "t17"."l_commitdate", + "t17"."l1_suppkey", + "t17"."s_name", + "t17"."n_name" + FROM ( + SELECT + "t10"."l_orderkey" AS "l1_orderkey", + "t13"."o_orderstatus", + "t10"."l_receiptdate", + "t10"."l_commitdate", + "t10"."l_suppkey" AS "l1_suppkey", + "t9"."s_name", + "t8"."n_name" + FROM ( SELECT - 1 AS anon_1 - FROM hive.ibis_sf1.lineitem AS t2 - WHERE - t2.l_orderkey = t0.l1_orderkey AND t2.l_suppkey <> t0.l1_suppkey - ) - ) - AND NOT ( - EXISTS( + "t0"."s_suppkey", + "t0"."s_name", + "t0"."s_address", + "t0"."s_nationkey", + "t0"."s_phone", + CAST("t0"."s_acctbal" AS DECIMAL(15, 2)) AS "s_acctbal", + "t0"."s_comment" + FROM "supplier" AS "t0" + ) AS "t9" + INNER JOIN ( + SELECT + "t1"."l_orderkey", + "t1"."l_partkey", + "t1"."l_suppkey", + "t1"."l_linenumber", + CAST("t1"."l_quantity" AS DECIMAL(15, 2)) AS "l_quantity", + CAST("t1"."l_extendedprice" AS DECIMAL(15, 2)) AS "l_extendedprice", + CAST("t1"."l_discount" AS DECIMAL(15, 2)) AS "l_discount", + CAST("t1"."l_tax" AS DECIMAL(15, 2)) AS "l_tax", + "t1"."l_returnflag", + "t1"."l_linestatus", + "t1"."l_shipdate", + "t1"."l_commitdate", + "t1"."l_receiptdate", + "t1"."l_shipinstruct", + "t1"."l_shipmode", + "t1"."l_comment" + FROM "lineitem" AS "t1" + ) AS "t10" + ON "t9"."s_suppkey" = "t10"."l_suppkey" + INNER JOIN ( SELECT - 1 AS anon_2 - FROM hive.ibis_sf1.lineitem AS t2 + "t2"."o_orderkey", + "t2"."o_custkey", + "t2"."o_orderstatus", + CAST("t2"."o_totalprice" AS DECIMAL(15, 2)) AS "o_totalprice", + "t2"."o_orderdate", + "t2"."o_orderpriority", + "t2"."o_clerk", + "t2"."o_shippriority", + "t2"."o_comment" + FROM "orders" AS "t2" + ) AS "t13" + ON "t13"."o_orderkey" = "t10"."l_orderkey" + INNER JOIN ( + SELECT + "t3"."n_nationkey", + "t3"."n_name", + "t3"."n_regionkey", + "t3"."n_comment" + FROM "nation" AS "t3" + ) AS "t8" + ON "t9"."s_nationkey" = "t8"."n_nationkey" + ) AS "t17" + WHERE + "t17"."o_orderstatus" = 'F' + AND "t17"."l_receiptdate" > "t17"."l_commitdate" + AND "t17"."n_name" = 'SAUDI ARABIA' + AND EXISTS( + SELECT + 1 AS "1" + FROM ( + SELECT + "t1"."l_orderkey", + "t1"."l_partkey", + "t1"."l_suppkey", + "t1"."l_linenumber", + CAST("t1"."l_quantity" AS DECIMAL(15, 2)) AS "l_quantity", + CAST("t1"."l_extendedprice" AS DECIMAL(15, 2)) AS "l_extendedprice", + CAST("t1"."l_discount" AS DECIMAL(15, 2)) AS "l_discount", + CAST("t1"."l_tax" AS DECIMAL(15, 2)) AS "l_tax", + "t1"."l_returnflag", + "t1"."l_linestatus", + "t1"."l_shipdate", + "t1"."l_commitdate", + "t1"."l_receiptdate", + "t1"."l_shipinstruct", + "t1"."l_shipmode", + "t1"."l_comment" + FROM "lineitem" AS "t1" + ) AS "t11" WHERE - t2.l_orderkey = t0.l1_orderkey - AND t2.l_suppkey <> t0.l1_suppkey - AND t2.l_receiptdate > t2.l_commitdate + ( + "t11"."l_orderkey" = "t17"."l1_orderkey" + ) + AND ( + "t11"."l_suppkey" <> "t17"."l1_suppkey" + ) + ) + AND NOT ( + EXISTS( + SELECT + 1 AS "1" + FROM ( + SELECT + "t1"."l_orderkey", + "t1"."l_partkey", + "t1"."l_suppkey", + "t1"."l_linenumber", + CAST("t1"."l_quantity" AS DECIMAL(15, 2)) AS "l_quantity", + CAST("t1"."l_extendedprice" AS DECIMAL(15, 2)) AS "l_extendedprice", + CAST("t1"."l_discount" AS DECIMAL(15, 2)) AS "l_discount", + CAST("t1"."l_tax" AS DECIMAL(15, 2)) AS "l_tax", + "t1"."l_returnflag", + "t1"."l_linestatus", + "t1"."l_shipdate", + "t1"."l_commitdate", + "t1"."l_receiptdate", + "t1"."l_shipinstruct", + "t1"."l_shipmode", + "t1"."l_comment" + FROM "lineitem" AS "t1" + ) AS "t12" + WHERE + ( + ( + "t12"."l_orderkey" = "t17"."l1_orderkey" + ) + AND ( + "t12"."l_suppkey" <> "t17"."l1_suppkey" + ) + ) + AND ( + "t12"."l_receiptdate" > "t12"."l_commitdate" + ) + ) ) - ) + ) AS "t20" GROUP BY 1 -) AS t1 +) AS "t21" ORDER BY - t1.numwait DESC, - t1.s_name ASC + "t21"."numwait" DESC, + "t21"."s_name" ASC LIMIT 100 \ No newline at end of file diff --git a/ibis/backends/tests/tpch/snapshots/test_h22/test_tpc_h22/trino/h22.sql b/ibis/backends/tests/tpch/snapshots/test_h22/test_tpc_h22/trino/h22.sql index c76bc9fc5af0..10f4cf53dd9c 100644 --- a/ibis/backends/tests/tpch/snapshots/test_h22/test_tpc_h22/trino/h22.sql +++ b/ibis/backends/tests/tpch/snapshots/test_h22/test_tpc_h22/trino/h22.sql @@ -1,62 +1,61 @@ -WITH t0 AS ( +SELECT + "t7"."cntrycode", + "t7"."numcust", + "t7"."totacctbal" +FROM ( SELECT - CASE - WHEN ( - 0 + 1 >= 1 - ) - THEN SUBSTR(t2.c_phone, 0 + 1, 2) - ELSE SUBSTR(t2.c_phone, 0 + 1 + LENGTH(t2.c_phone), 2) - END AS cntrycode, - t2.c_acctbal AS c_acctbal - FROM hive.ibis_sf1.customer AS t2 - WHERE - CASE - WHEN ( - 0 + 1 >= 1 - ) - THEN SUBSTR(t2.c_phone, 0 + 1, 2) - ELSE SUBSTR(t2.c_phone, 0 + 1 + LENGTH(t2.c_phone), 2) - END IN ('13', '31', '23', '29', '30', '18', '17') - AND t2.c_acctbal > ( + "t6"."cntrycode", + COUNT(*) AS "numcust", + SUM("t6"."c_acctbal") AS "totacctbal" + FROM ( + SELECT + IF(0 >= 0, SUBSTRING("t2"."c_phone", 0 + 1, 2), SUBSTRING("t2"."c_phone", 0, 2)) AS "cntrycode", + "t2"."c_acctbal" + FROM ( SELECT - anon_1.avg_bal - FROM ( - SELECT - AVG(t2.c_acctbal) AS avg_bal - FROM hive.ibis_sf1.customer AS t2 - WHERE - t2.c_acctbal > 0.0 - AND CASE - WHEN ( - 0 + 1 >= 1 - ) - THEN SUBSTR(t2.c_phone, 0 + 1, 2) - ELSE SUBSTR(t2.c_phone, 0 + 1 + LENGTH(t2.c_phone), 2) - END IN ('13', '31', '23', '29', '30', '18', '17') - ) AS anon_1 - ) - AND NOT ( - EXISTS( + "t0"."c_custkey", + "t0"."c_name", + "t0"."c_address", + "t0"."c_nationkey", + "t0"."c_phone", + CAST("t0"."c_acctbal" AS DECIMAL(15, 2)) AS "c_acctbal", + "t0"."c_mktsegment", + "t0"."c_comment" + FROM "customer" AS "t0" + ) AS "t2" + WHERE + IF(0 >= 0, SUBSTRING("t2"."c_phone", 0 + 1, 2), SUBSTRING("t2"."c_phone", 0, 2)) IN ('13', '31', '23', '29', '30', '18', '17') + AND "t2"."c_acctbal" > ( SELECT - 1 AS anon_2 - FROM hive.ibis_sf1.orders AS t3 - WHERE - t3.o_custkey = t2.c_custkey + AVG("t3"."c_acctbal") AS "Mean(c_acctbal)" + FROM ( + SELECT + "t0"."c_custkey", + "t0"."c_name", + "t0"."c_address", + "t0"."c_nationkey", + "t0"."c_phone", + CAST("t0"."c_acctbal" AS DECIMAL(15, 2)) AS "c_acctbal", + "t0"."c_mktsegment", + "t0"."c_comment" + FROM "customer" AS "t0" + WHERE + CAST("t0"."c_acctbal" AS DECIMAL(15, 2)) > CAST(0.0 AS DOUBLE) + AND IF(0 >= 0, SUBSTRING("t0"."c_phone", 0 + 1, 2), SUBSTRING("t0"."c_phone", 0, 2)) IN ('13', '31', '23', '29', '30', '18', '17') + ) AS "t3" ) - ) -) -SELECT - t1.cntrycode, - t1.numcust, - t1.totacctbal -FROM ( - SELECT - t0.cntrycode AS cntrycode, - COUNT(*) AS numcust, - SUM(t0.c_acctbal) AS totacctbal - FROM t0 + AND NOT ( + EXISTS( + SELECT + 1 AS "1" + FROM "orders" AS "t1" + WHERE + "t1"."o_custkey" = "t2"."c_custkey" + ) + ) + ) AS "t6" GROUP BY 1 -) AS t1 +) AS "t7" ORDER BY - t1.cntrycode ASC \ No newline at end of file + "t7"."cntrycode" ASC \ No newline at end of file diff --git a/ibis/backends/tests/tpch/test_h15.py b/ibis/backends/tests/tpch/test_h15.py index 0d3fedd9a1b0..5d288e389d2a 100644 --- a/ibis/backends/tests/tpch/test_h15.py +++ b/ibis/backends/tests/tpch/test_h15.py @@ -1,11 +1,19 @@ from __future__ import annotations +import pytest + import ibis from .conftest import add_date, tpch_test @tpch_test +@pytest.mark.notyet( + ["trino"], + reason="unreliable due to floating point differences in repeated evaluations of identical subqueries", + raises=AssertionError, + strict=False, +) def test_tpc_h15(lineitem, supplier): """Top Supplier Query (Q15)""" @@ -26,6 +34,5 @@ def test_tpc_h15(lineitem, supplier): q = supplier.join(qrev, supplier.s_suppkey == qrev.l_suppkey) q = q.filter([q.total_revenue == qrev.total_revenue.max()]) - q = q.order_by([q.s_suppkey]) q = q[q.s_suppkey, q.s_name, q.s_address, q.s_phone, q.total_revenue] - return q + return q.order_by([q.s_suppkey]) diff --git a/ibis/backends/trino/__init__.py b/ibis/backends/trino/__init__.py index 833f6a8ecb9c..7122241c56c1 100644 --- a/ibis/backends/trino/__init__.py +++ b/ibis/backends/trino/__init__.py @@ -2,60 +2,175 @@ from __future__ import annotations -import collections +import atexit import contextlib -import warnings from functools import cached_property +from operator import itemgetter from typing import TYPE_CHECKING, Any -import pandas as pd -import sqlalchemy as sa import sqlglot as sg -import toolz -from trino.sqlalchemy.datatype import ROW as _ROW -from trino.sqlalchemy.dialect import TrinoDialect +import sqlglot.expressions as sge +import trino import ibis import ibis.common.exceptions as com import ibis.expr.datatypes as dt +import ibis.expr.schema as sch import ibis.expr.types as ir from ibis import util from ibis.backends.base import CanListDatabases -from ibis.backends.base.sql.alchemy import ( - AlchemyCanCreateSchema, - AlchemyCrossSchemaBackend, -) -from ibis.backends.base.sql.alchemy.datatypes import ArrayType -from ibis.backends.trino.compiler import TrinoSQLCompiler -from ibis.backends.trino.datatypes import INTERVAL, ROW, TrinoType +from ibis.backends.base.sqlglot import SQLGlotBackend +from ibis.backends.base.sqlglot.compiler import C +from ibis.backends.trino.compiler import TrinoCompiler if TYPE_CHECKING: from collections.abc import Iterator, Mapping + import pandas as pd import pyarrow as pa - import ibis.expr.schema as sch + import ibis.expr.operations as ops -class Backend(AlchemyCrossSchemaBackend, AlchemyCanCreateSchema, CanListDatabases): +class Backend(SQLGlotBackend, CanListDatabases): name = "trino" - compiler = TrinoSQLCompiler + compiler = TrinoCompiler() supports_create_or_replace = False supports_temporary_tables = False + def raw_sql(self, query: str | sg.Expression) -> Any: + """Execute a raw SQL query.""" + with contextlib.suppress(AttributeError): + query = query.sql(dialect=self.name, pretty=True) + + con = self.con + cur = con.cursor() + try: + cur.execute(query) + except Exception: + if con.transaction is not None: + con.rollback() + if cur._query: + cur.close() + raise + else: + if con.transaction is not None: + con.commit() + return cur + + @contextlib.contextmanager + def begin(self): + con = self.con + cur = con.cursor() + try: + yield cur + except Exception: + if con.transaction is not None: + con.rollback() + raise + else: + if con.transaction is not None: + con.commit() + finally: + if cur._query: + cur.close() + + @contextlib.contextmanager + def _safe_raw_sql( + self, query: str | sge.Expression + ) -> Iterator[trino.dbapi.Cursor]: + """Execute a raw SQL query, yielding the cursor. + + Parameters + ---------- + query + The query to execute. + + Yields + ------ + trino.dbapi.Cursor + The cursor of the executed query. + """ + cur = self.raw_sql(query) + try: + yield cur + finally: + if cur._query: + cur.close() + + def get_schema( + self, table_name: str, schema: str | None = None, database: str | None = None + ) -> sch.Schema: + """Compute the schema of a `table`. + + Parameters + ---------- + table_name + May **not** be fully qualified. Use `database` if you want to + qualify the identifier. + schema + Schema name + database + Database name + + Returns + ------- + sch.Schema + Ibis schema + """ + conditions = [sg.column("table_name").eq(sge.convert(table_name))] + + if schema is not None: + conditions.append(sg.column("table_schema").eq(sge.convert(schema))) + + query = ( + sg.select( + "column_name", + "data_type", + sg.column("is_nullable").eq(sge.convert("YES")).as_("nullable"), + ) + .from_(sg.table("columns", db="information_schema", catalog=database)) + .where(sg.and_(*conditions)) + .order_by("ordinal_position") + ) + + with self._safe_raw_sql(query) as cur: + meta = cur.fetchall() + + if not meta: + fqn = sg.table(table_name, db=schema, catalog=database).sql(self.name) + raise com.IbisError(f"Table not found: {fqn}") + + return sch.Schema( + { + name: self.compiler.type_mapper.from_string(typ, nullable=nullable) + for name, typ, nullable in meta + } + ) + @cached_property def version(self) -> str: - return self._scalar_query(sa.select(sa.func.version())) + with self._safe_raw_sql(sg.select(self.compiler.f.version())) as cur: + [(version,)] = cur.fetchall() + return version @property def current_database(self) -> str: - return self._scalar_query(sa.select(sa.literal_column("current_catalog"))) + with self._safe_raw_sql(sg.select(C.current_catalog)) as cur: + [(database,)] = cur.fetchall() + return database + + @property + def current_schema(self) -> str: + with self._safe_raw_sql(sg.select(C.current_schema)) as cur: + [(schema,)] = cur.fetchall() + return schema def list_databases(self, like: str | None = None) -> list[str]: query = "SHOW CATALOGS" - with self.begin() as con: - catalogs = list(con.exec_driver_sql(query).scalars()) - return self._filter_with_like(catalogs, like=like) + with self._safe_raw_sql(query) as cur: + catalogs = cur.fetchall() + return self._filter_with_like(list(map(itemgetter(0), catalogs)), like=like) def list_schemas( self, like: str | None = None, database: str | None = None @@ -63,15 +178,14 @@ def list_schemas( query = "SHOW SCHEMAS" if database is not None: - query += f" IN {self._quote(database)}" - - with self.begin() as con: - schemata = list(con.exec_driver_sql(query).scalars()) - return self._filter_with_like(schemata, like) + database = sg.to_identifier(database, quoted=self.compiler.quoted).sql( + self.name + ) + query += f" IN {database}" - @property - def current_schema(self) -> str: - return self._scalar_query(sa.select(sa.literal_column("current_schema"))) + with self._safe_raw_sql(query) as cur: + schemata = cur.fetchall() + return self._filter_with_like(list(map(itemgetter(0), schemata)), like) def list_tables( self, @@ -111,10 +225,10 @@ def list_tables( if database is not None: query += f" IN {database}" - with self.begin() as con: - tables = list(con.exec_driver_sql(query).scalars()) + with self._safe_raw_sql(query) as cur: + tables = cur.fetchall() - return self._filter_with_like(tables, like=like) + return self._filter_with_like(list(map(itemgetter(0), tables)), like=like) def do_connect( self, @@ -125,6 +239,7 @@ def do_connect( database: str | None = None, schema: str | None = None, source: str | None = None, + timezone: str = "UTC", **connect_args, ) -> None: """Connect to Trino. @@ -145,6 +260,8 @@ def do_connect( Schema to use on the Trino server source Application name passed to Trino + timezone + Timezone to use for the connection connect_args Additional keyword arguments passed directly to SQLAlchemy's `create_engine` @@ -167,97 +284,83 @@ def do_connect( >>> con = ibis.trino.connect(database=catalog, schema=schema) >>> con = ibis.trino.connect(database=catalog, schema=schema, source="my-app") """ - database = "/".join(filter(None, (database, schema))) - url = sa.engine.URL.create( - drivername="trino", - username=user, - password=password, + self.con = trino.dbapi.connect( + user=user, + auth=password, host=host, port=port, - database=database, - query=dict(source="ibis" if source is None else source), + catalog=database, + schema=schema, + source=source or "ibis", + timezone=timezone, + **connect_args, ) - connect_args.setdefault("timezone", "UTC") - with warnings.catch_warnings(): - warnings.filterwarnings( - "ignore", - message=r"The dbapi\(\) classmethod on dialect classes has been renamed", - category=sa.exc.SADeprecationWarning, - ) - super().do_connect( - sa.create_engine( - url, connect_args=connect_args, poolclass=sa.pool.StaticPool - ) - ) - - @staticmethod - def _new_sa_metadata(): - meta = sa.MetaData() - - @sa.event.listens_for(meta, "column_reflect") - def column_reflect(inspector, table, column_info): - if isinstance(typ := column_info["type"], _ROW): - column_info["type"] = ROW(typ.attr_types) - elif isinstance(typ, sa.ARRAY): - column_info["type"] = toolz.nth( - typ.dimensions or 1, toolz.iterate(ArrayType, typ.item_type) - ) - elif isinstance(typ, sa.Interval): - column_info["type"] = INTERVAL( - native=typ.native, - day_precision=typ.day_precision, - second_precision=typ.second_precision, - ) - - return meta + self._temp_views = set() @contextlib.contextmanager def _prepare_metadata(self, query: str) -> Iterator[dict[str, str]]: - name = util.gen_name("trino_metadata") - with self.begin() as con: - con.exec_driver_sql(f"PREPARE {name} FROM {query}") + name = util.gen_name(f"{self.name}_metadata") + with self.begin() as cur: + cur.execute(f"PREPARE {name} FROM {query}") try: - yield con.exec_driver_sql(f"DESCRIBE OUTPUT {name}").mappings() + cur.execute(f"DESCRIBE OUTPUT {name}") + yield cur.fetchall() finally: - con.exec_driver_sql(f"DEALLOCATE PREPARE {name}") + cur.execute(f"DEALLOCATE PREPARE {name}") def _metadata(self, query: str) -> Iterator[tuple[str, dt.DataType]]: - with self._prepare_metadata(query) as mappings: + with self._prepare_metadata(query) as info: yield from ( # trino types appear to be always nullable - (name, TrinoType.from_string(trino_type).copy(nullable=True)) - for name, trino_type in toolz.pluck(["Column Name", "Type"], mappings) + ( + name, + self.compiler.type_mapper.from_string(trino_type).copy( + nullable=True + ), + ) + for name, _, _, _, trino_type, *_ in info ) def _execute_view_creation(self, name, definition): - from sqlalchemy_views import CreateView - # NB: trino doesn't support temporary views so we use the less # desirable method of cleaning up when the Python process exits using # an atexit hook # # the method that defines the atexit hook is defined in the parent # class - view = CreateView(sa.table(name), definition, or_replace=True) + view = sg.Create( + kind="VIEW", + this=sg.table(name, quoted=self.compiler.quoted), + expression=definition, + replace=True, + ) - with self.begin() as con: - con.execute(view) + with self._safe_raw_sql(view): + pass def create_schema( self, name: str, database: str | None = None, force: bool = False ) -> None: - name = ".".join(map(self._quote, filter(None, [database, name]))) - if_not_exists = "IF NOT EXISTS " * force - with self.begin() as con: - con.exec_driver_sql(f"CREATE SCHEMA {if_not_exists}{name}") + with self._safe_raw_sql( + sge.Create( + this=sg.table(name, catalog=database, quoted=self.compiler.quoted), + kind="SCHEMA", + exists=force, + ) + ): + pass def drop_schema( self, name: str, database: str | None = None, force: bool = False ) -> None: - name = ".".join(map(self._quote, filter(None, [database, name]))) - if_exists = "IF EXISTS " * force - with self.begin() as con: - con.exec_driver_sql(f"DROP SCHEMA {if_exists}{name}") + with self._safe_raw_sql( + sge.Drop( + this=sg.table(name, catalog=database, quoted=self.compiler.quoted), + kind="SCHEMA", + exists=force, + ) + ): + pass def create_table( self, @@ -301,49 +404,46 @@ def create_table( if temp: raise NotImplementedError( - "Temporary tables in the Trino backend are not yet supported" + "Temporary tables are not supported in the Trino backend" ) - orig_table_ref = name + quoted = self.compiler.quoted + orig_table_ref = sg.to_identifier(name, quoted=quoted) if overwrite: - name = util.gen_name("trino_overwrite") - - create_stmt = "CREATE TABLE" + name = util.gen_name(f"{self.name}_overwrite") - table_ref = self._quote(name) - - create_stmt += f" {table_ref}" + table_ref = sg.table(name, catalog=database, quoted=quoted) if schema is not None and obj is None: - schema_str = ", ".join( - ( - f"{self._quote(name)} {TrinoType.to_string(typ)}" - + " NOT NULL" * (not typ.nullable) + column_defs = [ + sg.exp.ColumnDef( + this=sg.to_identifier(name, quoted=self.compiler.quoted), + kind=self.compiler.type_mapper.from_ibis(typ), + # TODO(cpcloud): not null constraints are unreliable in + # trino, so we ignore them + # https://github.com/trinodb/trino/issues/2923 + constraints=None, ) for name, typ in schema.items() - ) - create_stmt += f" ({schema_str})" - - if comment is not None: - create_stmt += f" COMMENT {comment!r}" - - if properties: - - def literal_compile(v): - if isinstance(v, collections.abc.Mapping): - return f"MAP(ARRAY{list(v.keys())!r}, ARRAY{list(v.values())!r})" - elif util.is_iterable(v): - return f"ARRAY{list(v)!r}" - else: - return repr(v) + ] + target = sge.Schema(this=table_ref, expressions=column_defs) + else: + target = table_ref - pairs = ", ".join( - f"{k} = {literal_compile(v)}" for k, v in properties.items() + property_list = [ + sge.Property( + this=sg.to_identifier(k), + value=self.compiler.translate(ibis.literal(v).op(), params={}), ) - create_stmt += f" WITH ({pairs})" + for k, v in (properties or {}).items() + ] + + if comment: + property_list.append(sge.SchemaCommentProperty(this=sge.convert(comment))) if obj is not None: + import pandas as pd import pyarrow as pa import pyarrow_hotfix # noqa: F401 @@ -354,53 +454,119 @@ def literal_compile(v): self._run_pre_execute_hooks(table) - compiled_table = self.compile(table) - # cast here because trino doesn't allow specifying a schema in # CTAS, e.g., `CREATE TABLE (schema) AS SELECT` - subquery = compiled_table.subquery() - columns = subquery.columns - select = sa.select( + select = sg.select( *( - sa.cast(columns[name], TrinoType.from_ibis(typ)) + self.compiler.cast(sg.column(name, quoted=quoted), typ).as_( + name, quoted=quoted + ) for name, typ in (schema or table.schema()).items() ) - ) - - compiled = select.compile( - dialect=TrinoDialect(), compile_kwargs=dict(literal_binds=True) - ) - - create_stmt += f" AS {compiled}" - - with self.begin() as con: - con.exec_driver_sql(create_stmt) + ).from_(self._to_sqlglot(table).subquery()) + else: + select = None + + create_stmt = sge.Create( + kind="TABLE", + this=target, + expression=select, + properties=( + sge.Properties(expressions=property_list) if property_list else None + ), + ) + with self._safe_raw_sql(create_stmt) as cur: if overwrite: # drop the original table - con.exec_driver_sql( - f"DROP TABLE IF EXISTS {self._quote(orig_table_ref)}" + cur.execute( + sge.Drop(kind="TABLE", this=orig_table_ref, exists=True).sql( + self.name + ) ) # rename the new table to the original table name - con.exec_driver_sql( - f"ALTER TABLE IF EXISTS {table_ref} RENAME TO {self._quote(orig_table_ref)}" + cur.execute( + sge.AlterTable( + this=table_ref, + exists=True, + actions=[sge.RenameTable(this=orig_table_ref, exists=True)], + ).sql(self.name) ) - return self.table(orig_table_ref) + return self.table(orig_table_ref.name) - def _table_from_schema( - self, - name: str, - schema: sch.Schema, - temp: bool = False, - database: str | None = None, - **kwargs: Any, - ) -> sa.Table: - return super()._table_from_schema( - name, - schema, - temp=temp, - trino_catalog=database or self.current_database, - **kwargs, + def _get_temp_view_definition(self, name: str, definition: str) -> str: + return sge.Create( + this=sg.to_identifier(name, quoted=self.compiler.quoted), + kind="VIEW", + expression=definition, + replace=True, ) + + def _register_temp_view_cleanup(self, name: str) -> None: + def drop(self, name: str, query: str): + self.raw_sql(query) + self._temp_views.discard(name) + + query = sge.Drop(this=sg.table(name), kind="VIEW", exists=True) + atexit.register(drop, self, name=name, query=query) + + def _fetch_from_cursor(self, cursor, schema: sch.Schema) -> pd.DataFrame: + import pandas as pd + + from ibis.backends.trino.converter import TrinoPandasData + + try: + df = pd.DataFrame.from_records( + cursor.fetchall(), columns=schema.names, coerce_float=True + ) + except Exception: + # clean up the cursor if we fail to create the DataFrame + # + # in the sqlite case failing to close the cursor results in + # artificially locked tables + cursor.close() + raise + df = TrinoPandasData.convert_table(df, schema) + return df + + def _register_in_memory_table(self, op: ops.InMemoryTable) -> None: + schema = op.schema + if null_columns := [col for col, dtype in schema.items() if dtype.is_null()]: + raise com.IbisTypeError( + "Trino cannot yet reliably handle `null` typed columns; " + f"got null typed columns: {null_columns}" + ) + + # only register if we haven't already done so + if (name := op.name) not in self.list_tables(): + quoted = self.compiler.quoted + column_defs = [ + sg.exp.ColumnDef( + this=sg.to_identifier(colname, quoted=quoted), + kind=self.compiler.type_mapper.from_ibis(typ), + # we don't support `NOT NULL` constraints in trino because + # because each trino connector differs in whether it + # supports nullability constraints, and whether the + # connector supports it isn't visible to ibis via a + # metadata query + ) + for colname, typ in schema.items() + ] + + create_stmt = sg.exp.Create( + kind="TABLE", + this=sg.exp.Schema( + this=sg.to_identifier(name, quoted=quoted), expressions=column_defs + ), + ).sql(self.name, pretty=True) + + data = op.data.to_frame().itertuples(index=False) + specs = ", ".join("?" * len(schema)) + table = sg.table(name, quoted=quoted).sql(self.name) + insert_stmt = f"INSERT INTO {table} VALUES ({specs})" + with self.begin() as cur: + cur.execute(create_stmt) + for row in data: + cur.execute(insert_stmt, row) diff --git a/ibis/backends/trino/compiler.py b/ibis/backends/trino/compiler.py index e8d199daead5..3ea36c3b81f3 100644 --- a/ibis/backends/trino/compiler.py +++ b/ibis/backends/trino/compiler.py @@ -1,86 +1,521 @@ from __future__ import annotations -import sqlalchemy as sa +import math +from functools import partial, reduce, singledispatchmethod +import sqlglot as sg +import sqlglot.expressions as sge +import toolz +from sqlglot.dialects import Trino +from sqlglot.dialects.dialect import rename_func + +import ibis.common.exceptions as com +import ibis.expr.datatypes as dt import ibis.expr.operations as ops -from ibis.backends.base.sql.alchemy import AlchemyCompiler, AlchemyExprTranslator -from ibis.backends.base.sql.alchemy.query_builder import _AlchemyTableSetFormatter -from ibis.backends.trino.datatypes import TrinoType -from ibis.backends.trino.registry import operation_registry -from ibis.common.exceptions import UnsupportedOperationError - - -class TrinoSQLExprTranslator(AlchemyExprTranslator): - _registry = operation_registry.copy() - _rewrites = AlchemyExprTranslator._rewrites.copy() - _has_reduction_filter_syntax = True - _supports_tuple_syntax = True - _integer_to_timestamp = staticmethod(sa.func.from_unixtime) - - _forbids_frame_clause = ( - *AlchemyExprTranslator._forbids_frame_clause, - ops.Lead, - ops.Lag, - ) - _require_order_by = ( - *AlchemyExprTranslator._require_order_by, - ops.Lag, - ops.Lead, - ) - _dialect_name = "trino" - supports_unnest_in_select = False +from ibis.backends.base.sqlglot.compiler import FALSE, NULL, SQLGlotCompiler, paren +from ibis.backends.base.sqlglot.datatypes import TrinoType +from ibis.backends.base.sqlglot.rewrites import ( + exclude_unsupported_window_frame_from_ops, + rewrite_first_to_first_value, + rewrite_last_to_last_value, +) +from ibis.expr.rewrites import rewrite_sample + + +# TODO(cpcloud): remove this hack once +# https://github.com/tobymao/sqlglot/issues/2735 is resolved +def make_cross_joins_explicit(node): + if not (node.kind or node.side): + node.args["kind"] = "CROSS" + return node + + +Trino.Generator.TRANSFORMS |= { + sge.BitwiseLeftShift: rename_func("bitwise_left_shift"), + sge.BitwiseRightShift: rename_func("bitwise_right_shift"), + sge.Join: sg.transforms.preprocess([make_cross_joins_explicit]), +} + + +class TrinoCompiler(SQLGlotCompiler): + __slots__ = () + + dialect = "trino" type_mapper = TrinoType + rewrites = ( + rewrite_sample, + rewrite_first_to_first_value, + rewrite_last_to_last_value, + exclude_unsupported_window_frame_from_ops, + *SQLGlotCompiler.rewrites, + ) + quoted = True + NAN = sg.func("nan") + POS_INF = sg.func("infinity") + NEG_INF = -POS_INF -rewrites = TrinoSQLExprTranslator.rewrites + def _aggregate(self, funcname: str, *args, where): + expr = self.f[funcname](*args) + if where is not None: + return sge.Filter(this=expr, expression=sge.Where(this=where)) + return expr + @staticmethod + def _minimize_spec(start, end, spec): + if ( + start is None + and isinstance(getattr(end, "value", None), ops.Literal) + and end.value.value == 0 + and end.following + ): + return None + return spec -@rewrites(ops.Any) -@rewrites(ops.All) -@rewrites(ops.StringContains) -def _no_op(expr): - return expr + @singledispatchmethod + def visit_node(self, op, **kw): + return super().visit_node(op, **kw) + @visit_node.register(ops.Correlation) + def visit_Correlation(self, op, *, left, right, how, where): + if how == "sample": + raise com.UnsupportedOperationError( + "Trino does not support `sample` correlation" + ) + if (left_type := op.left.dtype).is_boolean(): + left = self.cast(left, dt.Int32(nullable=left_type.nullable)) -@rewrites(ops.StringContains) -def _rewrite_string_contains(op): - return ops.GreaterEqual(ops.StringFind(op.haystack, op.needle), 0) + if (right_type := op.right.dtype).is_boolean(): + right = self.cast(right, dt.Int32(nullable=right_type.nullable)) + return self.agg.corr(left, right, where=where) -class TrinoTableSetFormatter(_AlchemyTableSetFormatter): - def _format_sample(self, op, table): - if op.seed is not None: - raise UnsupportedOperationError( - "`Table.sample` with a random seed is unsupported" + @visit_node.register(ops.Arbitrary) + def visit_Arbitrary(self, op, *, arg, how, where): + if how != "first": + raise com.UnsupportedOperationError( + 'Trino only supports how="first" for `arbitrary` reduction' ) - method = sa.func.bernoulli if op.method == "row" else sa.func.system - return table.tablesample( - sampling=method(sa.literal_column(f"{op.fraction * 100}")) + return self.agg.arbitrary(arg, where=where) + + @visit_node.register(ops.BitXor) + def visit_BitXor(self, op, *, arg, where): + a, b = map(sg.to_identifier, "ab") + input_fn = combine_fn = sge.Lambda( + this=sge.BitwiseXor(this=a, expression=b), expressions=[a, b] ) + return self.agg.reduce_agg(arg, 0, input_fn, combine_fn, where=where) + + @visit_node.register(ops.ArrayRepeat) + def visit_ArrayRepeat(self, op, *, arg, times): + return self.f.flatten(self.f.repeat(arg, times)) + + @visit_node.register(ops.ArraySlice) + def visit_ArraySlice(self, op, *, arg, start, stop): + def _neg_idx_to_pos(n, idx): + return self.if_(idx < 0, n + self.f.greatest(idx, -n), idx) + + arg_length = self.f.cardinality(arg) + + if start is None: + start = 0 + else: + start = self.f.least(arg_length, _neg_idx_to_pos(arg_length, start)) - def _format_in_memory_table(self, op, translator): - if not op.data: - return sa.select( - *( - translator.translate(ops.Literal(None, dtype=type_)).label(name) - for name, type_ in op.schema.items() + if stop is None: + stop = arg_length + else: + stop = _neg_idx_to_pos(arg_length, stop) + + return self.f.slice(arg, start + 1, stop - start) + + @visit_node.register(ops.ArrayMap) + def visit_ArrayMap(self, op, *, arg, param, body): + return self.f.transform(arg, sge.Lambda(this=body, expressions=[param])) + + @visit_node.register(ops.ArrayFilter) + def visit_ArrayFilter(self, op, *, arg, param, body): + return self.f.filter(arg, sge.Lambda(this=body, expressions=[param])) + + @visit_node.register(ops.ArrayContains) + def visit_ArrayContains(self, op, *, arg, other): + return self.if_( + arg.is_(sg.not_(NULL)), + self.f.coalesce(self.f.contains(arg, other), FALSE), + NULL, + ) + + @visit_node.register(ops.JSONGetItem) + def visit_JSONGetItem(self, op, *, arg, index): + fmt = "%d" if op.index.dtype.is_integer() else '"%s"' + return self.f.json_extract(arg, self.f.format(f"$[{fmt}]", index)) + + @visit_node.register(ops.DayOfWeekIndex) + def visit_DayOfWeekIndex(self, op, *, arg): + return self.cast(paren(self.f.day_of_week(arg) + 6) % 7, op.dtype) + + @visit_node.register(ops.DayOfWeekName) + def visit_DayOfWeekName(self, op, *, arg): + return self.f.date_format(arg, "%W") + + @visit_node.register(ops.StrRight) + def visit_StrRight(self, op, *, arg, nchars): + return self.f.substr(arg, -nchars) + + @visit_node.register(ops.EndsWith) + def visit_EndsWith(self, op, *, arg, end): + return self.f.substr(arg, -self.f.length(end)).eq(end) + + @visit_node.register(ops.Repeat) + def visit_Repeat(self, op, *, arg, times): + return self.f.array_join(self.f.repeat(arg, times), "") + + @visit_node.register(ops.DateTruncate) + @visit_node.register(ops.TimestampTruncate) + def visit_DateTimestampTruncate(self, op, *, arg, unit): + _truncate_precisions = { + # ms unit is not yet officially documented but it works + "ms": "millisecond", + "s": "second", + "m": "minute", + "h": "hour", + "D": "day", + "W": "week", + "M": "month", + "Q": "quarter", + "Y": "year", + } + + if (precision := _truncate_precisions.get(unit.short)) is None: + raise com.UnsupportedOperationError( + f"Unsupported truncate unit {op.unit!r}" + ) + return self.f.date_trunc(precision, arg) + + @visit_node.register(ops.DateFromYMD) + def visit_DateFromYMD(self, op, *, year, month, day): + return self.f.from_iso8601_date( + self.f.format("%04d-%02d-%02d", year, month, day) + ) + + @visit_node.register(ops.TimeFromHMS) + def visit_TimeFromHMS(self, op, *, hours, minutes, seconds): + return self.cast( + self.f.format("%02d:%02d:%02d", hours, minutes, seconds), dt.time + ) + + @visit_node.register(ops.TimestampFromYMDHMS) + def visit_TimestampFromYMDHMS( + self, op, *, year, month, day, hours, minutes, seconds + ): + return self.cast( + self.f.from_iso8601_timestamp( + self.f.format( + "%04d-%02d-%02dT%02d:%02d:%02d", + year, + month, + day, + hours, + minutes, + seconds, ) - ).limit(0) + ), + dt.timestamp, + ) + + @visit_node.register(ops.TimestampFromUNIX) + def visit_TimestampFromUNIX(self, op, *, arg, unit): + short = unit.short + if short == "ms": + res = self.f.from_unixtime(self.f.floor(arg / 1_000)) + elif short == "s": + res = self.f.from_unixtime(arg) + elif short == "us": + res = self.f.from_unixtime_nanos((arg - arg % 1_000_000) * 1_000) + elif short == "ns": + res = self.f.from_unixtime_nanos(arg - arg % 1_000_000_000) + else: + raise com.UnsupportedOperationError(f"{unit!r} unit is not supported") + return self.cast(res, op.dtype) + + @visit_node.register(ops.StructColumn) + def visit_StructColumn(self, op, *, names, values): + return self.cast(sge.Struct(expressions=list(values)), op.dtype) + + def visit_NonNullLiteral(self, op, *, value, dtype): + if dtype.is_floating(): + if math.isfinite(value): + return self.cast(value, dtype) + return super().visit_NonNullLiteral(op, value=value, dtype=dtype) + elif dtype.is_struct(): + items = [ + self.visit_Literal(ops.Literal(v, fdtype), value=v, dtype=fdtype) + for fdtype, v in zip(dtype.types, value.values()) + ] + return self.cast(sge.Struct(expressions=items), dtype) + elif dtype.is_timestamp(): + return self.cast(self.f.from_iso8601_timestamp(value.isoformat()), dtype) + elif dtype.is_date(): + return self.f.from_iso8601_date(value.isoformat()) + elif dtype.is_time(): + return self.cast(value.isoformat(), dtype) + elif dtype.is_interval(): + return sge.Interval( + this=sge.convert(str(value)), unit=self.v[dtype.resolution.upper()] + ) + elif dtype.is_binary(): + return self.f.from_hex(value.hex()) + else: + return None + + @visit_node.register(ops.Log) + def visit_Log(self, op, *, arg, base): + return self.f.log(base, arg, dialect=self.dialect) + + @visit_node.register(ops.MapGet) + def visit_MapGet(self, op, *, arg, key, default): + return self.f.coalesce(self.f.element_at(arg, key), default) - op_schema = list(op.schema.items()) - rows = [ - tuple( - translator.translate(ops.Literal(col, dtype=type_)).label(name) - for col, (name, type_) in zip(row, op_schema) + @visit_node.register(ops.MapContains) + def visit_MapContains(self, op, *, arg, key): + return self.f.contains(self.f.map_keys(arg), key) + + @visit_node.register(ops.ExtractFile) + def visit_ExtractProtocol(self, op, *, arg): + return self.f.concat_ws( + "?", + self.f.nullif(self.f.url_extract_path(arg), ""), + self.f.nullif(self.f.url_extract_query(arg), ""), + ) + + @visit_node.register(ops.ExtractQuery) + def visit_ExtractQuery(self, op, *, arg, key): + if key is None: + return self.f.url_extract_query(arg) + return self.f.url_extract_parameter(arg, key) + + @visit_node.register(ops.Cot) + def visit_Cot(self, op, *, arg): + return 1.0 / self.f.tan(arg) + + @visit_node.register(ops.StringAscii) + def visit_StringAscii(self, op, *, arg): + return self.f.codepoint( + sge.Cast( + this=self.f.substr(arg, 1, 2), + to=sge.DataType( + this=sge.DataType.Type.VARCHAR, + expressions=[sge.DataTypeParam(this=sge.convert(1))], + ), + ) + ) + + @visit_node.register(ops.ArrayStringJoin) + def visit_ArrayStringJoin(self, op, *, sep, arg): + return self.f.array_join(arg, sep) + + @visit_node.register(ops.First) + def visit_First(self, op, *, arg, where): + return self.f.element_at(self.agg.array_agg(arg, where=where), 1) + + @visit_node.register(ops.Last) + def visit_Last(self, op, *, arg, where): + return self.f.element_at(self.agg.array_agg(arg, where=where), -1) + + @visit_node.register(ops.ArrayZip) + def visit_ArrayZip(self, op, *, arg): + max_zip_arguments = 5 + chunks = ( + (len(chunk), self.f.zip(*chunk) if len(chunk) > 1 else chunk[0]) + for chunk in toolz.partition_all(max_zip_arguments, arg) + ) + + def combine_zipped(left, right): + left_n, left_chunk = left + x, y = map(sg.to_identifier, "xy") + + lhs = list(map(x.__getitem__, range(left_n))) if left_n > 1 else [x] + + right_n, right_chunk = right + rhs = list(map(y.__getitem__, range(right_n))) if right_n > 1 else [y] + + zipped_chunk = self.f.zip_with( + left_chunk, + right_chunk, + sge.Lambda(this=self.f.row(*lhs, *rhs), expressions=[x, y]), + ) + return left_n + right_n, zipped_chunk + + all_n, chunk = reduce(combine_zipped, chunks) + assert all_n == len(op.dtype.value_type) + return chunk + + @visit_node.register(ops.ExtractMicrosecond) + def visit_ExtractMicrosecond(self, op, *, arg): + # trino only seems to store milliseconds, but the result of formatting + # always pads the right with 000 + return self.cast(self.f.date_format(arg, "%f"), dt.int32) + + @visit_node.register(ops.TimeDelta) + @visit_node.register(ops.DateDelta) + @visit_node.register(ops.TimestampDelta) + def visit_TemporalDelta(self, op, *, part, left, right): + # trino truncates _after_ the delta, whereas many other backends + # truncate each operand + dialect = self.dialect + return self.f.date_diff( + part, + self.f.date_trunc(part, right, dialect=dialect), + self.f.date_trunc(part, left, dialect=dialect), + dialect=dialect, + ) + + @visit_node.register(ops.IntervalFromInteger) + def visit_IntervalFromInteger(self, op, *, arg, unit): + unit = op.unit.short + if unit in ("Y", "Q", "M", "W"): + raise com.UnsupportedOperationError(f"Interval unit {unit!r} not supported") + return self.f.parse_duration( + self.f.concat( + self.cast(arg, dt.String(nullable=op.arg.dtype.nullable)), unit.lower() ) - for row in op.data.to_frame().itertuples(index=False) - ] - columns = translator._schema_to_sqlalchemy_columns(op.schema) - return sa.values(*columns, name=op.name).data(rows).select().subquery() + ) + + @visit_node.register(ops.TimestampRange) + @visit_node.register(ops.IntegerRange) + def visit_Range(self, op, *, start, stop, step): + def zero_value(dtype): + if dtype.is_interval(): + # the unit doesn't matter here, because e.g. 0d = 0s + return self.f.parse_duration("0s") + return 0 + + def interval_sign(v): + zero = self.f.parse_duration("0s") + return sge.Case( + ifs=[ + self.if_(v.eq(zero), 0), + self.if_(v < zero, -1), + self.if_(v > zero, 1), + ] + ) + + def _sign(value, dtype): + if dtype.is_interval(): + return interval_sign(value) + return self.f.sign(value) + + step_dtype = op.step.dtype + zero = zero_value(step_dtype) + return self.if_( + sg.and_( + self.f.nullif(step, zero).is_(sg.not_(NULL)), + _sign(step, step_dtype).eq(_sign(stop - start, step_dtype)), + ), + self.f.array_remove(self.f.sequence(start, stop, step), stop), + self.f.array(), + ) + + @visit_node.register(ops.ArrayIndex) + def visit_ArrayIndex(self, op, *, arg, index): + return self.f.element_at(arg, index + 1) + + @visit_node.register(ops.Cast) + def visit_Cast(self, op, *, arg, to): + from_ = op.arg.dtype + if from_.is_integer() and to.is_interval(): + return self.visit_IntervalFromInteger( + ops.IntervalFromInteger(op.arg, unit=to.unit), + arg=arg, + unit=to.unit, + ) + elif from_.is_integer() and to.is_timestamp(): + return self.f.from_unixtime(arg, to.timezone or "UTC") + return super().visit_Cast(op, arg=arg, to=to) + + @visit_node.register(ops.CountDistinctStar) + def visit_CountDistinctStar(self, op, *, arg, where): + make_col = partial(sg.column, table=arg.alias_or_name, quoted=self.quoted) + row = self.f.row(*map(make_col, op.arg.schema.names)) + return self.agg.count(sge.Distinct(expressions=[row]), where=where) + + @visit_node.register(ops.ArrayConcat) + def visit_ArrayConcat(self, op, *, arg): + return self.f.concat(*arg) + + @visit_node.register(ops.StringContains) + def visit_StringContains(self, op, *, haystack, needle): + return self.f.strpos(haystack, needle) > 0 + + @visit_node.register(ops.RegexExtract) + def visit_RegexpExtract(self, op, *, arg, pattern, index): + # sqlglot doesn't support the third `group` argument for trino so work + # around that limitation using an anonymous function + return sge.Anonymous(this="regexp_extract", expressions=[arg, pattern, index]) + + @visit_node.register(ops.Quantile) + @visit_node.register(ops.MultiQuantile) + @visit_node.register(ops.Median) + @visit_node.register(ops.RowID) + @visit_node.register(ops.TimestampBucket) + def visit_Undefined(self, op, **kw): + return super().visit_Undefined(op, **kw) + + +_SIMPLE_OPS = { + ops.Pi: "pi", + ops.E: "e", + ops.RegexReplace: "regexp_replace", + ops.Map: "map", + ops.MapKeys: "map_keys", + ops.MapLength: "cardinality", + ops.MapMerge: "map_concat", + ops.MapValues: "map_values", + ops.Log2: "log2", + ops.Log10: "log10", + ops.IsNan: "is_nan", + ops.IsInf: "is_infinite", + ops.StringToTimestamp: "date_parse", + ops.Strftime: "date_format", + ops.ExtractEpochSeconds: "to_unixtime", + ops.ExtractWeekOfYear: "week_of_year", + ops.ExtractDayOfYear: "day_of_year", + ops.ExtractMillisecond: "millisecond", + ops.ArrayUnion: "array_union", + ops.ArrayRemove: "array_remove", + ops.ArrayFlatten: "flatten", + ops.ArraySort: "array_sort", + ops.ArrayDistinct: "array_distinct", + ops.ArrayLength: "cardinality", + ops.ArrayCollect: "array_agg", + ops.ArrayIntersect: "array_intersect", + ops.BitAnd: "bitwise_and_agg", + ops.BitOr: "bitwise_or_agg", + ops.TypeOf: "typeof", + ops.Levenshtein: "levenshtein_distance", + ops.ExtractProtocol: "url_extract_protocol", + ops.ExtractHost: "url_extract_host", + ops.ExtractPath: "url_extract_path", + ops.ExtractFragment: "url_extract_fragment", + ops.RPad: "rpad", + ops.LPad: "lpad", + ops.ArrayPosition: "array_position", +} + +for _op, _name in _SIMPLE_OPS.items(): + assert isinstance(type(_op), type), type(_op) + if issubclass(_op, ops.Reduction): + + @TrinoCompiler.visit_node.register(_op) + def _fmt(self, op, *, _name: str = _name, where, **kw): + return self.agg[_name](*kw.values(), where=where) + + else: + + @TrinoCompiler.visit_node.register(_op) + def _fmt(self, op, *, _name: str = _name, **kw): + return self.f[_name](*kw.values()) + setattr(TrinoCompiler, f"visit_{_op.__name__}", _fmt) -class TrinoSQLCompiler(AlchemyCompiler): - cheap_in_memory_tables = False - translator_class = TrinoSQLExprTranslator - null_limit = sa.literal_column("ALL") - table_set_formatter_class = TrinoTableSetFormatter +del _op, _name, _fmt diff --git a/ibis/backends/trino/converter.py b/ibis/backends/trino/converter.py new file mode 100644 index 000000000000..5488b6083219 --- /dev/null +++ b/ibis/backends/trino/converter.py @@ -0,0 +1,24 @@ +from __future__ import annotations + +import datetime + +from ibis.formats.pandas import PandasData + + +class TrinoPandasData(PandasData): + @classmethod + def convert_Interval(cls, s, dtype, pandas_dtype): + def parse_trino_timedelta(value): + # format is 'days hour:minute:second.millisecond' + days, rest = value.split(" ", 1) + hms, millis = rest.split(".", 1) + hours, minutes, seconds = hms.split(":") + return datetime.timedelta( + days=int(days), + hours=int(hours), + minutes=int(minutes), + seconds=int(seconds), + milliseconds=int(millis), + ) + + return s.map(parse_trino_timedelta, na_action="ignore") diff --git a/ibis/backends/trino/datatypes.py b/ibis/backends/trino/datatypes.py deleted file mode 100644 index d34bc2c81997..000000000000 --- a/ibis/backends/trino/datatypes.py +++ /dev/null @@ -1,149 +0,0 @@ -from __future__ import annotations - -from datetime import time, timedelta -from typing import Any - -import sqlalchemy.types as sat -import trino.client -from sqlalchemy.ext.compiler import compiles -from trino.sqlalchemy.datatype import DOUBLE, JSON, MAP, TIMESTAMP -from trino.sqlalchemy.datatype import ROW as _ROW - -import ibis.expr.datatypes as dt -from ibis.backends.base.sql.alchemy.datatypes import AlchemyType -from ibis.backends.base.sqlglot.datatypes import TrinoType as SqlglotTrinoType - - -class ROW(_ROW): - _result_is_tuple = hasattr(trino.client, "NamedRowTuple") - - def result_processor(self, dialect, coltype: str) -> None: - if not coltype.lower().startswith("row"): - return None - - def process( - value, - result_is_tuple: bool = self._result_is_tuple, - names: tuple[str, ...] = tuple(name for name, _ in self.attr_types), - ) -> dict[str, Any] | None: - if value is None or not result_is_tuple: - return value - else: - return dict(zip(names, value)) - - return process - - -class INTERVAL(sat.Interval): - def result_processor(self, dialect, coltype: str) -> None: - def process(value): - if value is None: - return value - - # TODO: support year-month intervals - days, duration = value.split(" ", 1) - t = time.fromisoformat(duration) - return timedelta( - days=int(days), - hours=t.hour, - minutes=t.minute, - seconds=t.second, - microseconds=t.microsecond, - ) - - return process - - -@compiles(TIMESTAMP) -def compiles_timestamp(typ, compiler, **kw): - result = "TIMESTAMP" - - if (prec := typ.precision) is not None: - result += f"({prec:d})" - - if typ.timezone: - result += " WITH TIME ZONE" - - return result - - -@compiles(ROW) -def _compiles_row(element, compiler, **kw): - # TODO: @compiles should live in the dialect - quote = compiler.dialect.identifier_preparer.quote - content = ", ".join( - f"{quote(field)} {compiler.process(typ, **kw)}" - for field, typ in element.attr_types - ) - return f"ROW({content})" - - -@compiles(MAP) -def compiles_map(typ, compiler, **kw): - # TODO: @compiles should live in the dialect - key_type = compiler.process(typ.key_type, **kw) - value_type = compiler.process(typ.value_type, **kw) - return f"MAP({key_type}, {value_type})" - - -@compiles(DOUBLE) -@compiles(sat.REAL, "trino") -def _floating(element, compiler, **kw): - return type(element).__name__.upper() - - -class TrinoType(AlchemyType): - dialect = "trino" - source_types = { - DOUBLE: dt.Float64, - sat.REAL: dt.Float32, - JSON: dt.JSON, - } - - @classmethod - def to_ibis(cls, typ, nullable=True): - if dtype := cls.source_types.get(type(typ)): - return dtype(nullable=nullable) - elif isinstance(typ, sat.NUMERIC): - return dt.Decimal(typ.precision or 18, typ.scale or 3, nullable=nullable) - elif isinstance(typ, sat.ARRAY): - value_dtype = cls.to_ibis(typ.item_type) - return dt.Array(value_dtype, nullable=nullable) - elif isinstance(typ, ROW): - fields = ((k, cls.to_ibis(v)) for k, v in typ.attr_types) - return dt.Struct.from_tuples(fields, nullable=nullable) - elif isinstance(typ, MAP): - return dt.Map( - cls.to_ibis(typ.key_type), - cls.to_ibis(typ.value_type), - nullable=nullable, - ) - elif isinstance(typ, TIMESTAMP): - return dt.Timestamp( - timezone="UTC" if typ.timezone else None, - scale=typ.precision, - nullable=nullable, - ) - else: - return super().to_ibis(typ, nullable=nullable) - - @classmethod - def from_ibis(cls, dtype): - if isinstance(dtype, dt.Float64): - return DOUBLE() - elif isinstance(dtype, dt.Float32): - return sat.REAL() - elif dtype.is_string(): - return sat.VARCHAR() - elif dtype.is_struct(): - return ROW((name, cls.from_ibis(typ)) for name, typ in dtype.fields.items()) - elif dtype.is_map(): - return MAP(cls.from_ibis(dtype.key_type), cls.from_ibis(dtype.value_type)) - elif dtype.is_timestamp(): - return TIMESTAMP(precision=dtype.scale, timezone=bool(dtype.timezone)) - else: - return super().from_ibis(dtype) - - @classmethod - def from_string(cls, type_string, nullable=True): - return SqlglotTrinoType.from_string(type_string, nullable=nullable) diff --git a/ibis/backends/trino/registry.py b/ibis/backends/trino/registry.py deleted file mode 100644 index 74bd99a7f69a..000000000000 --- a/ibis/backends/trino/registry.py +++ /dev/null @@ -1,600 +0,0 @@ -from __future__ import annotations - -import operator -from functools import partial, reduce -from typing import Literal - -import sqlalchemy as sa -import toolz -from sqlalchemy.ext.compiler import compiles -from sqlalchemy.sql.expression import FunctionElement -from trino.sqlalchemy.datatype import DOUBLE - -import ibis -import ibis.common.exceptions as com -import ibis.expr.datatypes as dt -import ibis.expr.operations as ops -from ibis.backends.base.sql.alchemy.registry import ( - _literal as _alchemy_literal, -) -from ibis.backends.base.sql.alchemy.registry import ( - array_filter, - array_map, - fixed_arity, - reduction, - sqlalchemy_operation_registry, - sqlalchemy_window_functions_registry, - try_cast, - unary, - varargs, -) -from ibis.backends.postgres.registry import _corr, _covar -from ibis.backends.trino.datatypes import INTERVAL - -operation_registry = sqlalchemy_operation_registry.copy() -operation_registry.update(sqlalchemy_window_functions_registry) - - -def _array(t, elements): - return t.translate(ibis.array(elements).op()) - - -class make_array(FunctionElement): - pass - - -@compiles(make_array, "trino") -def compile_make_array(element, compiler, **kw): - return f"ARRAY[{compiler.process(element.clauses, **kw)}]" - - -def _literal(t, op): - value = op.value - dtype = op.dtype - - if value is None: - return sa.null() - elif dtype.is_struct(): - elements = ( - t.translate(ops.Literal(element, dtype=field_type)) - for element, field_type in zip(value.values(), dtype.types) - ) - return sa.cast(sa.func.row(*elements), t.get_sqla_type(dtype)) - elif dtype.is_array(): - value_type = dtype.value_type - return make_array( - *(t.translate(ops.Literal(element, dtype=value_type)) for element in value) - ) - elif dtype.is_map(): - return sa.func.map(_array(t, value.keys()), _array(t, value.values())) - elif dtype.is_float64(): - return sa.literal(float(value), type_=DOUBLE()) - elif dtype.is_integer(): - return sa.literal(int(value), type_=t.get_sqla_type(dtype)) - elif dtype.is_timestamp(): - return sa.cast( - sa.func.from_iso8601_timestamp(value.isoformat()), t.get_sqla_type(dtype) - ) - elif dtype.is_date(): - return sa.func.from_iso8601_date(value.isoformat()) - elif dtype.is_time(): - return sa.cast(sa.literal(str(value)), t.get_sqla_type(dtype)) - elif dtype.is_interval(): - return sa.literal_column( - f"INTERVAL '{value}' {dtype.resolution.upper()}", type_=INTERVAL - ) - - return _alchemy_literal(t, op) - - -def _arbitrary(t, op): - if op.how != "first": - raise com.UnsupportedOperationError( - 'Trino only supports how="first" for `arbitrary` reduction' - ) - return reduction(sa.func.arbitrary)(t, op) - - -def _json_get_item(t, op): - arg = t.translate(op.arg) - index = t.translate(op.index) - fmt = "%d" if op.index.dtype.is_integer() else '"%s"' - return sa.func.json_extract(arg, sa.func.format(f"$[{fmt}]", index)) - - -def _group_concat(t, op): - if not isinstance(op.sep, ops.Literal): - raise com.UnsupportedOperationError( - "Trino group concat separator must be a literal value" - ) - - arg = sa.func.array_agg(t.translate(op.arg)) - if (where := op.where) is not None: - arg = arg.filter(t.translate(where)) - return sa.func.array_join(arg, t.translate(op.sep)) - - -def _array_column(t, op): - args = ", ".join( - str(t.translate(arg).compile(compile_kwargs={"literal_binds": True})) - for arg in op.cols - ) - return sa.literal_column(f"ARRAY[{args}]", type_=t.get_sqla_type(op.dtype)) - - -_truncate_precisions = { - # ms unit is not yet officially documented in Trino's public documentation, - # but it just works. - "ms": "millisecond", - "s": "second", - "m": "minute", - "h": "hour", - "D": "day", - "W": "week", - "M": "month", - "Q": "quarter", - "Y": "year", -} - - -def _timestamp_truncate(t, op): - sa_arg = t.translate(op.arg) - try: - precision = _truncate_precisions[op.unit.short] - except KeyError: - raise com.UnsupportedOperationError(f"Unsupported truncate unit {op.unit!r}") - return sa.func.date_trunc(precision, sa_arg) - - -def _timestamp_from_unix(t, op): - arg, unit = op.args - arg = t.translate(arg) - - unit_short = unit.short - if unit_short == "ms": - try: - arg //= 1_000 - except TypeError: - arg = sa.func.floor(arg / 1_000) - res = sa.func.from_unixtime(arg) - elif unit_short == "s": - res = sa.func.from_unixtime(arg) - elif unit_short == "us": - res = sa.func.from_unixtime_nanos((arg - arg % 1_000_000) * 1_000) - elif unit_short == "ns": - res = sa.func.from_unixtime_nanos(arg - arg % 1_000_000_000) - else: - raise com.UnsupportedOperationError(f"{unit!r} unit is not supported") - return sa.cast(res, t.get_sqla_type(op.dtype)) - - -if_ = getattr(sa.func, "if") - - -def _neg_idx_to_pos(array, idx): - arg_length = sa.func.cardinality(array) - return if_(idx < 0, arg_length + sa.func.greatest(idx, -arg_length), idx) - - -def _array_slice(t, op): - arg = t.translate(op.arg) - - arg_length = sa.func.cardinality(arg) - - if (start := op.start) is None: - start = 0 - else: - start = sa.func.least(arg_length, _neg_idx_to_pos(arg, t.translate(start))) - - if (stop := op.stop) is None: - stop = arg_length - else: - stop = _neg_idx_to_pos(arg, t.translate(stop)) - - length = stop - start - return sa.func.slice(arg, start + 1, length, type_=arg.type) - - -def _extract_url_query(t, op): - arg = t.translate(op.arg) - key = op.key - if key is None: - result = sa.func.url_extract_query(arg) - else: - result = sa.func.url_extract_parameter(arg, t.translate(key)) - return sa.func.nullif(result, "") - - -def _round(t, op): - arg = t.translate(op.arg) - if (digits := op.digits) is not None: - return sa.func.round(arg, t.translate(digits)) - return sa.func.round(arg) - - -def _unnest(t, op): - arg = op.arg - name = arg.name - row_type = op.arg.dtype.value_type - names = getattr(row_type, "names", (name,)) - rd = sa.func.unnest(t.translate(arg)).table_valued(*names).render_derived() - # when unnesting a single column, unwrap the single ROW field access that - # would otherwise be generated, but keep the ROW if the array's element - # type is struct - if not row_type.is_struct(): - assert ( - len(names) == 1 - ), f"got non-struct dtype {row_type} with more than one name: {len(names)}" - return rd.c[0] - row = sa.func.row(*(rd.c[name] for name in names)) - return sa.cast(row, t.get_sqla_type(row_type)) - - -def _ifelse(t, op): - return if_( - t.translate(op.bool_expr), - t.translate(op.true_expr), - t.translate(op.false_null_expr), - type_=t.get_sqla_type(op.dtype), - ) - - -def _cot(t, op): - arg = t.translate(op.arg) - return 1.0 / sa.func.tan(arg, type_=t.get_sqla_type(op.arg.dtype)) - - -@compiles(array_map, "trino") -def compiles_list_apply(element, compiler, **kw): - *args, signature, result = map(partial(compiler.process, **kw), element.clauses) - return f"transform({', '.join(args)}, {signature} -> {result})" - - -def _array_map(t, op): - return array_map( - t.translate(op.arg), sa.literal_column(f"({op.param})"), t.translate(op.body) - ) - - -@compiles(array_filter, "trino") -def compiles_list_filter(element, compiler, **kw): - *args, signature, result = map(partial(compiler.process, **kw), element.clauses) - return f"filter({', '.join(args)}, {signature} -> {result})" - - -def _array_filter(t, op): - return array_filter( - t.translate(op.arg), sa.literal_column(f"({op.param})"), t.translate(op.body) - ) - - -def _first_last(t, op, *, offset: Literal[-1, 1]): - return sa.func.element_at(t._reduction(sa.func.array_agg, op), offset) - - -def _zip(t, op): - # more than one chunk means more than 5 arguments to zip, which trino - # doesn't support - # - # help trino out by reducing in chunks of 5 using zip_with - max_zip_arguments = 5 - chunks = ( - (len(chunk), sa.func.zip(*chunk) if len(chunk) > 1 else chunk[0]) - for chunk in toolz.partition_all(max_zip_arguments, map(t.translate, op.arg)) - ) - - def combine_zipped(left, right): - left_n, left_chunk = left - lhs = ( - ", ".join(f"x[{i:d}]" for i in range(1, left_n + 1)) if left_n > 1 else "x" - ) - - right_n, right_chunk = right - rhs = ( - ", ".join(f"y[{i:d}]" for i in range(1, right_n + 1)) - if right_n > 1 - else "y" - ) - - zipped_chunk = sa.func.zip_with( - left_chunk, right_chunk, sa.literal_column(f"(x, y) -> ROW({lhs}, {rhs})") - ) - return left_n + right_n, zipped_chunk - - all_n, chunk = reduce(combine_zipped, chunks) - - dtype = op.dtype - - assert all_n == len(dtype.value_type) - - return sa.type_coerce(chunk, t.get_sqla_type(dtype)) - - -@compiles(try_cast, "trino") -def compiles_try_cast(element, compiler, **kw): - return "TRY_CAST({} AS {})".format( - compiler.process(element.clauses.clauses[0], **kw), - compiler.visit_typeclause(element), - ) - - -def _try_cast(t, op): - arg = t.translate(op.arg) - to = t.get_sqla_type(op.to) - return try_cast(arg, type_=to) - - -def _array_intersect(t, op): - x = ops.Argument(name="x", shape=op.left.shape, dtype=op.left.dtype.value_type) - return t.translate( - ops.ArrayFilter(op.left, param=x.param, body=ops.ArrayContains(op.right, x)) - ) - - -_temporal_delta = fixed_arity( - lambda part, left, right: sa.func.date_diff( - part, sa.func.date_trunc(part, right), sa.func.date_trunc(part, left) - ), - 3, -) - - -def _interval_from_integer(t, op): - unit = op.unit.short - if unit in ("Y", "Q", "M", "W"): - raise com.UnsupportedOperationError(f"Interval unit {unit!r} not supported") - arg = sa.func.concat( - t.translate(ops.Cast(op.arg, dt.String(nullable=op.arg.dtype.nullable))), - unit.lower(), - ) - return sa.type_coerce(sa.func.parse_duration(arg), INTERVAL) - - -def zero_value(dtype): - if dtype.is_interval(): - # the unit doesn't matter here, because e.g. 0d = 0s - return sa.func.parse_duration("0s") - return 0 - - -def interval_sign(v): - zero = sa.func.parse_duration("0s") - return sa.case((v == zero, 0), (v < zero, -1), (v > zero, 1)) - - -def _sign(value, dtype): - if dtype.is_interval(): - return interval_sign(value) - return sa.func.sign(value) - - -def _range(t, op): - start = t.translate(op.start) - stop = t.translate(op.stop) - step = t.translate(op.step) - satype = t.get_sqla_type(op.dtype) - zero = zero_value(op.step.dtype) - return if_( - sa.and_( - sa.func.nullif(step, zero).is_not(None), - _sign(step, op.step.dtype) == _sign(stop - start, op.step.dtype), - ), - sa.func.array_remove( - sa.func.sequence(start, stop, step, type_=satype), stop, type_=satype - ), - sa.literal_column("ARRAY[]"), - ) - - -operation_registry.update( - { - # conditional expressions - # static checks are not happy with using "if" as a property - ops.IfElse: _ifelse, - # boolean reductions - ops.Any: reduction(sa.func.bool_or), - ops.All: reduction(sa.func.bool_and), - ops.ArgMin: reduction(sa.func.min_by), - ops.ArgMax: reduction(sa.func.max_by), - # array ops - ops.Correlation: _corr, - ops.Covariance: _covar, - ops.ExtractMillisecond: unary(sa.func.millisecond), - ops.Arbitrary: _arbitrary, - ops.ApproxCountDistinct: reduction(sa.func.approx_distinct), - ops.ApproxMedian: reduction(lambda arg: sa.func.approx_percentile(arg, 0.5)), - ops.RegexExtract: fixed_arity(sa.func.regexp_extract, 3), - ops.RegexReplace: fixed_arity(sa.func.regexp_replace, 3), - ops.RegexSearch: fixed_arity( - lambda arg, pattern: sa.func.regexp_position(arg, pattern) != -1, 2 - ), - ops.GroupConcat: _group_concat, - ops.BitAnd: reduction(sa.func.bitwise_and_agg), - ops.BitOr: reduction(sa.func.bitwise_or_agg), - ops.BitXor: reduction( - lambda arg: sa.func.reduce_agg( - arg, - 0, - sa.text("(a, b) -> bitwise_xor(a, b)"), - sa.text("(a, b) -> bitwise_xor(a, b)"), - ) - ), - ops.BitwiseAnd: fixed_arity(sa.func.bitwise_and, 2), - ops.BitwiseOr: fixed_arity(sa.func.bitwise_or, 2), - ops.BitwiseXor: fixed_arity(sa.func.bitwise_xor, 2), - ops.BitwiseLeftShift: fixed_arity(sa.func.bitwise_left_shift, 2), - ops.BitwiseRightShift: fixed_arity(sa.func.bitwise_right_shift, 2), - ops.BitwiseNot: unary(sa.func.bitwise_not), - ops.ArrayCollect: reduction(sa.func.array_agg), - ops.ArrayConcat: varargs(sa.func.concat), - ops.ArrayLength: unary(sa.func.cardinality), - ops.ArrayIndex: fixed_arity( - lambda arg, index: sa.func.element_at(arg, index + 1), 2 - ), - ops.ArrayColumn: _array_column, - ops.ArrayRepeat: fixed_arity( - lambda arg, times: sa.func.flatten(sa.func.repeat(arg, times)), 2 - ), - ops.ArraySlice: _array_slice, - ops.ArrayMap: _array_map, - ops.ArrayFilter: _array_filter, - ops.ArrayContains: fixed_arity( - lambda arr, el: if_( - arr != sa.null(), - sa.func.coalesce(sa.func.contains(arr, el), sa.false()), - sa.null(), - ), - 2, - ), - ops.ArrayPosition: fixed_arity( - lambda lst, el: sa.func.array_position(lst, el) - 1, 2 - ), - ops.ArrayDistinct: fixed_arity(sa.func.array_distinct, 1), - ops.ArraySort: fixed_arity(sa.func.array_sort, 1), - ops.ArrayRemove: fixed_arity(sa.func.array_remove, 2), - ops.ArrayUnion: fixed_arity(sa.func.array_union, 2), - ops.ArrayFlatten: unary(sa.func.flatten), - ops.JSONGetItem: _json_get_item, - ops.ExtractDayOfYear: unary(sa.func.day_of_year), - ops.ExtractWeekOfYear: unary(sa.func.week_of_year), - ops.DayOfWeekIndex: unary( - lambda arg: sa.cast( - sa.cast(sa.func.day_of_week(arg) + 6, sa.SMALLINT) % 7, sa.SMALLINT - ) - ), - ops.DayOfWeekName: unary(lambda arg: sa.func.date_format(arg, "%W")), - ops.ExtractEpochSeconds: unary(sa.func.to_unixtime), - ops.Translate: fixed_arity(sa.func.translate, 3), - ops.StrRight: fixed_arity(lambda arg, nchars: sa.func.substr(arg, -nchars), 2), - ops.StringSplit: fixed_arity(sa.func.split, 2), - ops.Repeat: fixed_arity( - lambda value, count: sa.func.array_join(sa.func.repeat(value, count), ""), 2 - ), - ops.DateTruncate: _timestamp_truncate, - ops.TimestampTruncate: _timestamp_truncate, - ops.DateFromYMD: fixed_arity( - lambda y, m, d: sa.func.from_iso8601_date( - sa.func.format("%04d-%02d-%02d", y, m, d) - ), - 3, - ), - ops.TimeFromHMS: fixed_arity( - lambda h, m, s: sa.cast(sa.func.format("%02d:%02d:%02d", h, m, s), sa.TIME), - 3, - ), - ops.TimestampFromYMDHMS: fixed_arity( - lambda y, mo, d, h, m, s: sa.cast( - sa.func.from_iso8601_timestamp( - sa.func.format("%04d-%02d-%02dT%02d:%02d:%02d", y, mo, d, h, m, s) - ), - sa.TIMESTAMP(timezone=False), - ), - 6, - ), - ops.Strftime: fixed_arity(sa.func.date_format, 2), - ops.StringToTimestamp: fixed_arity(sa.func.date_parse, 2), - ops.TimestampNow: fixed_arity(sa.func.now, 0), - ops.TimestampFromUNIX: _timestamp_from_unix, - ops.StructField: lambda t, op: t.translate(op.arg).op(".")(sa.text(op.field)), - ops.StructColumn: lambda t, op: sa.cast( - sa.func.row(*map(t.translate, op.values)), t.get_sqla_type(op.dtype) - ), - ops.Literal: _literal, - ops.IsNan: unary(sa.func.is_nan), - ops.IsInf: unary(sa.func.is_infinite), - ops.Log: fixed_arity(lambda arg, base: sa.func.log(base, arg), 2), - ops.Log2: unary(sa.func.log2), - ops.Log10: unary(sa.func.log10), - ops.MapLength: unary(sa.func.cardinality), - ops.MapGet: fixed_arity( - lambda arg, key, default: sa.func.coalesce( - sa.func.element_at(arg, key), default - ), - 3, - ), - ops.MapKeys: unary(sa.func.map_keys), - ops.MapValues: unary(sa.func.map_values), - ops.Map: fixed_arity(sa.func.map, 2), - ops.MapMerge: fixed_arity(sa.func.map_concat, 2), - ops.MapContains: fixed_arity( - lambda arg, key: sa.func.contains(sa.func.map_keys(arg), key), 2 - ), - ops.ExtractProtocol: unary( - lambda arg: sa.func.nullif(sa.func.url_extract_protocol(arg), "") - ), - ops.ExtractHost: unary( - lambda arg: sa.func.nullif(sa.func.url_extract_host(arg), "") - ), - ops.ExtractPath: unary( - lambda arg: sa.func.nullif(sa.func.url_extract_path(arg), "") - ), - ops.ExtractFragment: unary( - lambda arg: sa.func.nullif(sa.func.url_extract_fragment(arg), "") - ), - ops.ExtractFile: unary( - lambda arg: sa.func.concat_ws( - "?", - sa.func.nullif(sa.func.url_extract_path(arg), ""), - sa.func.nullif(sa.func.url_extract_query(arg), ""), - ) - ), - ops.ExtractQuery: _extract_url_query, - ops.Cot: _cot, - ops.Round: _round, - ops.Pi: fixed_arity(sa.func.pi, 0), - ops.E: fixed_arity(sa.func.e, 0), - ops.Quantile: reduction(sa.func.approx_percentile), - ops.MultiQuantile: reduction(sa.func.approx_percentile), - ops.StringAscii: unary( - lambda d: sa.func.codepoint( - sa.func.cast(sa.func.substr(d, 1, 2), sa.VARCHAR(1)) - ) - ), - ops.TypeOf: unary(sa.func.typeof), - ops.Unnest: _unnest, - ops.ArrayStringJoin: fixed_arity( - lambda sep, arr: sa.func.array_join(arr, sep), 2 - ), - ops.StartsWith: fixed_arity(sa.func.starts_with, 2), - ops.Argument: lambda _, op: sa.literal_column(op.param), - ops.First: partial(_first_last, offset=1), - ops.Last: partial(_first_last, offset=-1), - ops.ArrayZip: _zip, - ops.TryCast: _try_cast, - ops.ExtractMicrosecond: fixed_arity( - # trino only seems to store milliseconds, but the result of - # formatting always pads the right with 000 - lambda arg: sa.cast(sa.func.date_format(arg, "%f"), sa.INTEGER()), - 1, - ), - ops.Levenshtein: fixed_arity(sa.func.levenshtein_distance, 2), - ops.ArrayIntersect: _array_intersect, - # trino truncates _after_ the delta, whereas many other backends - # truncates each operand - ops.TimeDelta: _temporal_delta, - ops.DateDelta: _temporal_delta, - ops.TimestampDelta: _temporal_delta, - ops.TimestampAdd: fixed_arity(operator.add, 2), - ops.TimestampSub: fixed_arity(operator.sub, 2), - ops.TimestampDiff: fixed_arity(lambda x, y: sa.type_coerce(x - y, INTERVAL), 2), - ops.DateAdd: fixed_arity(operator.add, 2), - ops.DateSub: fixed_arity(operator.sub, 2), - ops.DateDiff: fixed_arity(lambda x, y: sa.type_coerce(x - y, INTERVAL), 2), - ops.IntervalAdd: fixed_arity(operator.add, 2), - ops.IntervalSubtract: fixed_arity(operator.sub, 2), - ops.IntervalFromInteger: _interval_from_integer, - ops.IntegerRange: _range, - ops.TimestampRange: _range, - ops.RegexSplit: fixed_arity(sa.func.regexp_split, 2), - } -) - -_invalid_operations = { - # ibis.expr.operations.reductions - ops.MultiQuantile, - ops.Quantile, -} - -operation_registry = { - k: v for k, v in operation_registry.items() if k not in _invalid_operations -} diff --git a/ibis/backends/trino/tests/conftest.py b/ibis/backends/trino/tests/conftest.py index 1f27b798761f..5abc5eaa859a 100644 --- a/ibis/backends/trino/tests/conftest.py +++ b/ibis/backends/trino/tests/conftest.py @@ -2,10 +2,11 @@ import os import subprocess -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Any import pytest import sqlglot as sg +import sqlglot.expressions as sge import ibis import ibis.expr.datatypes as dt @@ -41,10 +42,7 @@ class TestConf(ServiceBackendTest): supports_structs = True supports_map = True supports_tpch = True - deps = ("sqlalchemy", "trino.sqlalchemy") - - _tpch_data_schema = "tpch.tiny" - _tpch_query_schema = "hive.ibis_sf1" + deps = ("trino",) def preload(self): # create buckets @@ -86,7 +84,8 @@ def preload(self): def _transform_tpch_sql(self, parsed): def add_catalog_and_schema(node): if isinstance(node, sg.exp.Table): - catalog, db = self._tpch_query_schema.split(".") + catalog = "hive" + db = "ibis_sf1" return node.__class__( db=db, catalog=catalog, @@ -107,11 +106,10 @@ def load_tpch(self) -> None: to match the DuckDB TPC-H query conventions. """ con = self.connection - query_schema = self._tpch_query_schema - data_schema = self._tpch_data_schema - database, schema = query_schema.split(".") + database = "hive" + schema = "ibis_sf1" - tables = con.list_tables(schema=self._tpch_data_schema) + tables = con.list_tables(schema="tiny", database="tpch") con.create_schema(schema, database=database, force=True) prefixes = {"partsupp": "ps"} @@ -124,7 +122,7 @@ def load_tpch(self) -> None: prefix = prefixes.get(table, table[0]) t = ( - con.table(table, schema=data_schema) + con.table(table, schema="tiny", database="tpch") .rename(f"{prefix}_{{}}".format) # https://github.com/trinodb/trino/issues/19477 .mutate( @@ -132,16 +130,29 @@ def load_tpch(self) -> None: ) ) - sql = ibis.to_sql(t, dialect="trino") - c.exec_driver_sql( - f"CREATE OR REPLACE VIEW {query_schema}.{table} AS {sql}" - ) + sql = sge.Create( + kind="VIEW", + this=sg.table(table, db=schema, catalog=database), + expression=self.connection._to_sqlglot(t), + replace=True, + ).sql("trino", pretty=True) + + c.execute(sql) + + def _load_data(self, **_: Any) -> None: + """Load test data into a backend.""" + with self.connection.begin() as cur: + for stmt in self.ddl_script: + cur.execute(stmt) def _tpch_table(self, name: str): - return self.connection.table( - self.default_identifier_case_fn(name), - schema=self._tpch_query_schema, + from ibis import _ + + table = self.connection.table( + self.default_identifier_case_fn(name), schema="ibis_sf1", database="hive" ) + table = table.mutate(s.across(s.of_type("double"), _.cast("decimal(15, 2)"))) + return table @property def test_files(self) -> Iterable[Path]: diff --git a/ibis/backends/trino/tests/test_client.py b/ibis/backends/trino/tests/test_client.py index 7b0f01142c86..31e40314c24e 100644 --- a/ibis/backends/trino/tests/test_client.py +++ b/ibis/backends/trino/tests/test_client.py @@ -41,7 +41,8 @@ def test_table_properties(tmp_name): ) assert t.schema() == schema with con.begin() as c: - ddl = c.exec_driver_sql(f"SHOW CREATE TABLE {tmp_name}").scalar() + c.execute(f"SHOW CREATE TABLE {tmp_name}") + [(ddl,)] = c.fetchall() assert "ORC" in ddl assert "bucketed_by" in ddl @@ -78,20 +79,20 @@ def test_con_source(source, expected): schema="default", source=source, ) - assert con.con.url.query["source"] == expected + assert con.con.source == expected @pytest.mark.parametrize( - ("schema", "table"), + ("database", "schema", "table"), [ # tables known to exist - ("system.metadata", "table_comments"), - ("tpcds.sf1", "store"), - ("tpch.sf1", "nation"), + ("system", "metadata", "table_comments"), + ("tpcds", "sf1", "store"), + ("tpch", "sf1", "nation"), ], ) -def test_cross_schema_table_access(con, schema, table): - t = con.table(table, schema=schema) +def test_cross_schema_table_access(con, database, schema, table): + t = con.table(table, schema=schema, database=database) assert t.count().execute() @@ -115,9 +116,8 @@ def geometric_mean(x) -> float: result_n, result = expr.execute().squeeze().tolist() with con.begin() as c: - expected_n, expected = c.exec_driver_sql( - "SELECT COUNT(*), GEOMETRIC_MEAN(price) FROM diamonds" - ).one() + c.execute("SELECT COUNT(*), GEOMETRIC_MEAN(price) FROM diamonds") + [(expected_n, expected)] = c.fetchall() # check the count assert result_n > 0 @@ -148,28 +148,14 @@ def test_create_table_timestamp(): assert table not in con.list_tables() -def test_table_access_from_connection_without_catalog_or_schema(): - con = ibis.trino.connect() - # can't use the `system` catalog to test here, because the trino sqlalchemy - # dialect defaults to `system` if no catalog is passed, so it wouldn't be a - # useful test - assert con.current_database != "tpch" - assert con.current_schema is None - - t = con.table("region", schema="tpch.sf1") - - assert con.current_database != "tpch" - assert con.current_schema is None - - assert t.count().execute() - - def test_table_access_database_schema(con): t = con.table("region", schema="sf1", database="tpch") assert t.count().execute() - with pytest.raises(exc.IbisError, match="Cannot specify both"): + with pytest.raises(exc.IbisError, match='Table not found: tpch."tpch.sf1".region'): con.table("region", schema="tpch.sf1", database="tpch") - with pytest.raises(exc.IbisError, match="Cannot specify both"): + with pytest.raises( + exc.IbisError, match='Table not found: system."tpch.sf1".region' + ): con.table("region", schema="tpch.sf1", database="system") diff --git a/ibis/backends/trino/tests/test_datatypes.py b/ibis/backends/trino/tests/test_datatypes.py index 85c435e117d3..fc7164fcfa87 100644 --- a/ibis/backends/trino/tests/test_datatypes.py +++ b/ibis/backends/trino/tests/test_datatypes.py @@ -4,7 +4,7 @@ from pytest import param import ibis.expr.datatypes as dt -from ibis.backends.trino.datatypes import TrinoType +from ibis.backends.base.sqlglot.datatypes import TrinoType dtypes = [ ("interval year to month", dt.Interval(unit="M")), diff --git a/ibis/expr/rewrites.py b/ibis/expr/rewrites.py index 840cb59459e7..e59be96407a5 100644 --- a/ibis/expr/rewrites.py +++ b/ibis/expr/rewrites.py @@ -18,6 +18,7 @@ d = Namespace(deferred, module=ops) +x = var("x") y = var("y") name = var("name") diff --git a/poetry.lock b/poetry.lock index 81fca6747794..4d3356873ac6 100644 --- a/poetry.lock +++ b/poetry.lock @@ -6682,7 +6682,6 @@ files = [ python-dateutil = "*" pytz = "*" requests = ">=2.31.0" -sqlalchemy = {version = ">=1.3", optional = true, markers = "extra == \"sqlalchemy\""} tzlocal = "*" [package.extras] @@ -7329,10 +7328,10 @@ postgres = ["psycopg2", "sqlalchemy", "sqlalchemy-views"] pyspark = ["packaging", "pyspark", "sqlalchemy"] snowflake = ["packaging", "snowflake-connector-python"] sqlite = ["regex", "sqlalchemy", "sqlalchemy-views"] -trino = ["sqlalchemy", "sqlalchemy-views", "trino"] +trino = ["trino"] visualization = ["graphviz"] [metadata] lock-version = "2.0" python-versions = "^3.9" -content-hash = "cbcc7341639b579de656469a0f95dd7a4ffb5a8a1bc69a77520ef8b172bf0146" +content-hash = "083f8f6a6d3dab493009395aabe051d1758183ba4e2588fc505aac883e3beafe" diff --git a/pyproject.toml b/pyproject.toml index 36383e14b4c7..46cfbc080f1a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -92,7 +92,7 @@ sqlalchemy-exasol = { version = ">=4.6.0", optional = true, extras = [ "exasol", ] } sqlalchemy-views = { version = ">=0.3.1,<1", optional = true } -trino = { version = ">=0.321,<1", optional = true, extras = ["sqlalchemy"] } +trino = { version = ">=0.321,<1", optional = true } [tool.poetry.group.dev.dependencies] codespell = { version = ">=2.2.6,<3", extras = [ @@ -199,7 +199,7 @@ postgres = ["psycopg2", "sqlalchemy", "sqlalchemy-views"] pyspark = ["pyspark", "sqlalchemy", "packaging"] snowflake = ["snowflake-connector-python", "packaging"] sqlite = ["regex", "sqlalchemy", "sqlalchemy-views"] -trino = ["trino", "sqlalchemy", "sqlalchemy-views"] +trino = ["trino"] # non-backend extras visualization = ["graphviz"] decompiler = ["black"] diff --git a/requirements-dev.txt b/requirements-dev.txt index b64415f46288..919981b8c1cc 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -255,7 +255,7 @@ tornado==6.4 ; python_version >= "3.10" and python_version < "3.13" tqdm==4.66.1 ; python_version >= "3.9" and python_version < "4.0" traitlets==5.14.0 ; python_version >= "3.9" and python_version < "4.0" traittypes==0.2.1 ; python_version >= "3.10" and python_version < "3.13" -trino[sqlalchemy]==0.327.0 ; python_version >= "3.9" and python_version < "4.0" +trino==0.327.0 ; python_version >= "3.9" and python_version < "4.0" trove-classifiers==2023.11.29 ; python_version >= "3.9" and python_version < "4.0" typing-extensions==4.9.0 ; python_version >= "3.9" and python_version < "4.0" tzdata==2023.4 ; python_version >= "3.9" and python_version < "4.0" From cc5d283e47f279ec5f609f4fb827fe3a1b02e0fd Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Fri, 5 Jan 2024 11:51:19 -0500 Subject: [PATCH 2/3] fix(polars): force null sorting to match the rest of ibis --- ibis/backends/polars/compiler.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ibis/backends/polars/compiler.py b/ibis/backends/polars/compiler.py index c5d79c362be0..050839e09b2a 100644 --- a/ibis/backends/polars/compiler.py +++ b/ibis/backends/polars/compiler.py @@ -234,9 +234,9 @@ def sort(op, **kw): by = [key.name for key in op.keys] descending = [key.descending for key in op.keys] try: - lf = lf.sort(by, descending=descending) + lf = lf.sort(by, descending=descending, nulls_last=True) except TypeError: # pragma: no cover - lf = lf.sort(by, reverse=descending) # pragma: no cover + lf = lf.sort(by, reverse=descending, nulls_last=True) # pragma: no cover return lf From 6ed20fad420467b50542e77fdca9164a872682ca Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Fri, 5 Jan 2024 11:53:43 -0500 Subject: [PATCH 3/3] test(pandas): ignore array size warning --- pyproject.toml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 46cfbc080f1a..10563d1ef1c1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -285,6 +285,8 @@ filterwarnings = [ 'ignore:`np\.bool` is a deprecated alias for the builtin `bool`:DeprecationWarning', # numpy, coming from a pandas call 'ignore:In the future `np\.bool` will be defined as the corresponding NumPy scalar:FutureWarning', + # pandas by way of polars when comparing arrays + 'ignore:The truth value of an empty array is ambiguous.:DeprecationWarning', # druid 'ignore:Dialect druid.rest will not make use of SQL compilation caching:', # ibis