From a3bd85303c8400f7eaea102103dcc330d5bfd2dd Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Mon, 18 Mar 2024 15:31:55 -0500 Subject: [PATCH] fix(deps): bump sqlglot to pick up duckdb array fixes (#8682) --- .github/workflows/ibis-backends.yml | 8 ++++- ibis/__init__.py | 4 +-- ibis/backends/__init__.py | 4 +-- .../test_array_join_in_subquery/out.sql | 8 +++-- ibis/backends/druid/compiler.py | 4 ++- ibis/backends/duckdb/__init__.py | 4 +-- ibis/backends/duckdb/tests/test_geospatial.py | 2 ++ ibis/backends/flink/__init__.py | 6 ++-- ibis/backends/impala/compiler.py | 8 +++-- .../test_sql/test_is_parens/notnull/out.sql | 2 +- .../out.sql | 2 +- .../test_isnull_notnull/notnull/out.sql | 2 +- .../default/out.sql | 2 +- .../test_timestamp_from_integer/ms/out.sql | 2 +- .../test_timestamp_from_integer/us/out.sql | 2 +- ibis/backends/snowflake/compiler.py | 6 +++- ibis/backends/sql/__init__.py | 3 -- ibis/backends/sql/compiler.py | 2 +- ibis/backends/sqlite/compiler.py | 15 +++++++++- ibis/backends/sqlite/udf.py | 15 ++++++++-- .../test_many_subqueries/mssql/out.sql | 12 ++++---- .../test_cte_refs_in_topo_order/mssql/out.sql | 8 ++--- .../test_bug_duplicated_where/out.sql | 2 +- .../test_isnull_notnull/notnull/out.sql | 2 +- ibis/backends/tests/sql/test_select_sql.py | 2 +- ibis/backends/tests/test_array.py | 23 ++++++++++---- ibis/backends/tests/test_generic.py | 30 ++++++++++++++----- ibis/backends/tests/test_numeric.py | 13 ++++++++ ibis/backends/tests/test_temporal.py | 6 ++-- ibis/examples/tests/test_examples.py | 1 + ibis/expr/sql.py | 13 ++++---- ibis/tests/expr/mocks.py | 4 +-- ibis/tests/util.py | 4 +++ poetry.lock | 8 ++--- pyproject.toml | 2 +- requirements-dev.txt | 2 +- 36 files changed, 159 insertions(+), 74 deletions(-) diff --git a/.github/workflows/ibis-backends.yml b/.github/workflows/ibis-backends.yml index e4b30a7b78a9..5ef30235c026 100644 --- a/.github/workflows/ibis-backends.yml +++ b/.github/workflows/ibis-backends.yml @@ -61,12 +61,18 @@ jobs: extras: - duckdb - deltalake - - geospatial - examples - decompiler - polars additional_deps: - torch + # TODO: remove this duckdb job once the next duckdb_spatial is released + - name: duckdb + title: DuckDB + Geospatial + extras: + - geospatial + additional_deps: + - "duckdb==0.9.2" - name: clickhouse title: ClickHouse services: diff --git a/ibis/__init__.py b/ibis/__init__.py index 8c9a8af1c9b6..0c55b46be75b 100644 --- a/ibis/__init__.py +++ b/ibis/__init__.py @@ -97,7 +97,7 @@ def __getattr__(name: str) -> BaseBackend: # - has_operation # - add_operation # - _from_url - # - _to_sql + # - _to_sqlglot # # We also copy over the docstring from `do_connect` to the proxy `connect` # method, since that's where all the backend-specific kwargs are currently @@ -119,7 +119,7 @@ def connect(*args, **kwargs): proxy.add_operation = backend.add_operation proxy.name = name proxy._from_url = backend._from_url - proxy._to_sql = backend._to_sql + proxy._to_sqlglot = backend._to_sqlglot # Add any additional methods that should be exposed at the top level for name in getattr(backend, "_top_level_methods", ()): setattr(proxy, name, getattr(backend, name)) diff --git a/ibis/backends/__init__.py b/ibis/backends/__init__.py index feef3d84e2ec..0c6ec09cc4a8 100644 --- a/ibis/backends/__init__.py +++ b/ibis/backends/__init__.py @@ -926,8 +926,8 @@ def compile( """Compile an expression.""" return self.compiler.to_sql(expr, params=params) - def _to_sql(self, expr: ir.Expr, **kwargs) -> str: - """Convert an expression to a SQL string. + def _to_sqlglot(self, expr: ir.Expr, **kwargs) -> sg.exp.Expression: + """Convert an Ibis expression to a sqlglot expression. Called by `ibis.to_sql`; gives the backend an opportunity to generate nicer SQL for human consumption. diff --git a/ibis/backends/clickhouse/tests/snapshots/test_select/test_array_join_in_subquery/out.sql b/ibis/backends/clickhouse/tests/snapshots/test_select/test_array_join_in_subquery/out.sql index 9598809b7217..fdfc3523c1cc 100644 --- a/ibis/backends/clickhouse/tests/snapshots/test_select/test_array_join_in_subquery/out.sql +++ b/ibis/backends/clickhouse/tests/snapshots/test_select/test_array_join_in_subquery/out.sql @@ -1,5 +1,7 @@ SELECT - "t0"."id" IN (SELECT - arrayJoin("t1"."ids") AS "ids" - FROM "way_view" AS "t1") AS "InSubquery(id)" + "t0"."id" IN ( + SELECT + arrayJoin("t1"."ids") AS "ids" + FROM "way_view" AS "t1" + ) AS "InSubquery(id)" FROM "node_view" AS "t0" \ No newline at end of file diff --git a/ibis/backends/druid/compiler.py b/ibis/backends/druid/compiler.py index 17bd5884066e..e9ec2211c846 100644 --- a/ibis/backends/druid/compiler.py +++ b/ibis/backends/druid/compiler.py @@ -88,7 +88,6 @@ class DruidCompiler(SQLGlotCompiler): ops.BitwiseRightShift: "bitwise_shift_right", ops.Modulus: "mod", ops.Power: "power", - ops.Log10: "log10", ops.ApproxCountDistinct: "approx_count_distinct", ops.StringContains: "contains_string", } @@ -99,6 +98,9 @@ def _aggregate(self, funcname: str, *args, where): return sg.exp.Filter(this=expr, expression=sg.exp.Where(this=where)) return expr + def visit_Log10(self, op, *, arg): + return self.f.anon.log10(arg) + def visit_Sum(self, op, *, arg, where): arg = self.if_(arg, 1, 0) if op.arg.dtype.is_boolean() else arg return self.agg.sum(arg, where=where) diff --git a/ibis/backends/duckdb/__init__.py b/ibis/backends/duckdb/__init__.py index a43169347410..a3cac30f6a9d 100644 --- a/ibis/backends/duckdb/__init__.py +++ b/ibis/backends/duckdb/__init__.py @@ -1371,7 +1371,7 @@ def to_parquet( """ self._run_pre_execute_hooks(expr) - query = self._to_sql(expr, params=params) + query = self.compile(expr, params=params) args = ["FORMAT 'parquet'", *(f"{k.upper()} {v!r}" for k, v in kwargs.items())] copy_cmd = f"COPY ({query}) TO {str(path)!r} ({', '.join(args)})" with self._safe_raw_sql(copy_cmd): @@ -1407,7 +1407,7 @@ def to_csv( """ self._run_pre_execute_hooks(expr) - query = self._to_sql(expr, params=params) + query = self.compile(expr, params=params) args = [ "FORMAT 'csv'", f"HEADER {int(header)}", diff --git a/ibis/backends/duckdb/tests/test_geospatial.py b/ibis/backends/duckdb/tests/test_geospatial.py index b6787e81ba17..3cca6f59e7d5 100644 --- a/ibis/backends/duckdb/tests/test_geospatial.py +++ b/ibis/backends/duckdb/tests/test_geospatial.py @@ -304,6 +304,8 @@ def test_literal_geospatial_inferred(con, shp, expected, snapshot): reason="nix on linux cannot download duckdb extensions or data due to sandboxing", ) def test_load_geo_example(con): + pytest.importorskip("pins") + t = ibis.examples.zones.fetch(backend=con) assert t.geom.type().is_geospatial() diff --git a/ibis/backends/flink/__init__.py b/ibis/backends/flink/__init__.py index 1fec5bd011c3..8cf9cd744814 100644 --- a/ibis/backends/flink/__init__.py +++ b/ibis/backends/flink/__init__.py @@ -333,8 +333,10 @@ def compile( """Compile an Ibis expression to Flink.""" return super().compile(expr, params=params) # Discard `limit` and other kwargs. - def _to_sql(self, expr: ir.Expr, **kwargs: Any) -> str: - return str(self.compile(expr, **kwargs)) + def _to_sqlglot( + self, expr: ir.Expr, params: Mapping[ir.Expr, Any] | None = None, **_: Any + ) -> str: + return super()._to_sqlglot(expr, params=params) def execute(self, expr: ir.Expr, **kwargs: Any) -> Any: """Execute an expression.""" diff --git a/ibis/backends/impala/compiler.py b/ibis/backends/impala/compiler.py index bf4b1cb4d67c..8b0285a5dae6 100644 --- a/ibis/backends/impala/compiler.py +++ b/ibis/backends/impala/compiler.py @@ -75,8 +75,6 @@ class ImpalaCompiler(SQLGlotCompiler): ops.Hash: "fnv_hash", ops.LStrip: "ltrim", ops.Ln: "ln", - ops.Log10: "log10", - ops.Log2: "log2", ops.RandomUUID: "uuid", ops.RStrip: "rtrim", ops.Strip: "trim", @@ -114,6 +112,12 @@ def _minimize_spec(start, end, spec): return None return spec + def visit_Log2(self, op, *, arg): + return self.f.anon.log2(arg) + + def visit_Log10(self, op, *, arg): + return self.f.anon.log10(arg) + def visit_Literal(self, op, *, value, dtype): if value is None and dtype.is_binary(): return NULL diff --git a/ibis/backends/impala/tests/snapshots/test_sql/test_is_parens/notnull/out.sql b/ibis/backends/impala/tests/snapshots/test_sql/test_is_parens/notnull/out.sql index e34061596c65..42f541c42c37 100644 --- a/ibis/backends/impala/tests/snapshots/test_sql/test_is_parens/notnull/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_sql/test_is_parens/notnull/out.sql @@ -3,4 +3,4 @@ SELECT `t0`.`b` FROM `table` AS `t0` WHERE - NOT `t0`.`a` IS NULL = NOT `t0`.`b` IS NULL \ No newline at end of file + `t0`.`a` IS NOT NULL = `t0`.`b` IS NOT NULL \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_sql/test_logically_negate_complex_boolean_expr/out.sql b/ibis/backends/impala/tests/snapshots/test_sql/test_logically_negate_complex_boolean_expr/out.sql index 8d93c3148789..f0be225bba22 100644 --- a/ibis/backends/impala/tests/snapshots/test_sql/test_logically_negate_complex_boolean_expr/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_sql/test_logically_negate_complex_boolean_expr/out.sql @@ -1,5 +1,5 @@ SELECT NOT ( - `t0`.`a` IN ('foo') AND NOT `t0`.`c` IS NULL + `t0`.`a` IN ('foo') AND `t0`.`c` IS NOT NULL ) AS `tmp` FROM `t` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_isnull_notnull/notnull/out.sql b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_isnull_notnull/notnull/out.sql index e73b0f80ce14..c1da13197712 100644 --- a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_isnull_notnull/notnull/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_isnull_notnull/notnull/out.sql @@ -1,3 +1,3 @@ SELECT - NOT `t0`.`a` IS NULL AS `NotNull(a)` + `t0`.`a` IS NOT NULL AS `NotNull(a)` FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_from_integer/default/out.sql b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_from_integer/default/out.sql index 3028c41616d0..148dbb089055 100644 --- a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_from_integer/default/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_from_integer/default/out.sql @@ -1,3 +1,3 @@ SELECT - CAST(FROM_UNIXTIME(CAST(`t0`.`c` AS INT)) AS TIMESTAMP) AS `TimestampFromUNIX(c, SECOND)` + CAST(FROM_UNIXTIME(CAST(`t0`.`c` AS INT), 'yyyy-MM-dd HH:mm:ss') AS TIMESTAMP) AS `TimestampFromUNIX(c, SECOND)` FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_from_integer/ms/out.sql b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_from_integer/ms/out.sql index bdd164f40ce0..d7a8e14d6b70 100644 --- a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_from_integer/ms/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_from_integer/ms/out.sql @@ -1,3 +1,3 @@ SELECT - CAST(FROM_UNIXTIME(CAST(CAST(`t0`.`c` / 1000 AS INT) AS INT)) AS TIMESTAMP) AS `TimestampFromUNIX(c, MILLISECOND)` + CAST(FROM_UNIXTIME(CAST(CAST(`t0`.`c` / 1000 AS INT) AS INT), 'yyyy-MM-dd HH:mm:ss') AS TIMESTAMP) AS `TimestampFromUNIX(c, MILLISECOND)` FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_from_integer/us/out.sql b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_from_integer/us/out.sql index acbcbddc9cbb..3eb9e90985d3 100644 --- a/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_from_integer/us/out.sql +++ b/ibis/backends/impala/tests/snapshots/test_value_exprs/test_timestamp_from_integer/us/out.sql @@ -1,3 +1,3 @@ SELECT - CAST(FROM_UNIXTIME(CAST(CAST(`t0`.`c` / 1000000 AS INT) AS INT)) AS TIMESTAMP) AS `TimestampFromUNIX(c, MICROSECOND)` + CAST(FROM_UNIXTIME(CAST(CAST(`t0`.`c` / 1000000 AS INT) AS INT), 'yyyy-MM-dd HH:mm:ss') AS TIMESTAMP) AS `TimestampFromUNIX(c, MICROSECOND)` FROM `alltypes` AS `t0` \ No newline at end of file diff --git a/ibis/backends/snowflake/compiler.py b/ibis/backends/snowflake/compiler.py index 77e870ec4e5c..4437e21df16e 100644 --- a/ibis/backends/snowflake/compiler.py +++ b/ibis/backends/snowflake/compiler.py @@ -324,7 +324,11 @@ def visit_RegexExtract(self, op, *, arg, pattern, index): ) def visit_ArrayZip(self, op, *, arg): - return self.f.udf.array_zip(self.f.array(*arg)) + return self.if_( + sg.not_(sg.or_(*(arr.is_(NULL) for arr in arg))), + self.f.udf.array_zip(self.f.array(*arg)), + NULL, + ) def visit_DayOfWeekName(self, op, *, arg): return sge.Case( diff --git a/ibis/backends/sql/__init__.py b/ibis/backends/sql/__init__.py index e160665da24f..8d100f33e93a 100644 --- a/ibis/backends/sql/__init__.py +++ b/ibis/backends/sql/__init__.py @@ -120,9 +120,6 @@ def compile( self._log(sql) return sql - def _to_sql(self, expr: ir.Expr, **kwargs) -> str: - return self.compile(expr, **kwargs) - def _log(self, sql: str) -> None: """Log `sql`. diff --git a/ibis/backends/sql/compiler.py b/ibis/backends/sql/compiler.py index e32ff028cfc8..a929e2afc908 100644 --- a/ibis/backends/sql/compiler.py +++ b/ibis/backends/sql/compiler.py @@ -925,7 +925,7 @@ def visit_ExistsSubquery(self, op, *, rel): return self.f.exists(select) def visit_InSubquery(self, op, *, rel, needle): - return needle.isin(rel.this) + return needle.isin(query=rel.this) def visit_Array(self, op, *, exprs): return self.f.array(*exprs) diff --git a/ibis/backends/sqlite/compiler.py b/ibis/backends/sqlite/compiler.py index fc277b2a44d0..cc938ed7212d 100644 --- a/ibis/backends/sqlite/compiler.py +++ b/ibis/backends/sqlite/compiler.py @@ -1,5 +1,7 @@ from __future__ import annotations +import math + import sqlglot as sg import sqlglot.expressions as sge from public import public @@ -87,7 +89,6 @@ class SQLiteCompiler(SQLGlotCompiler): ops.BitwiseXor: "_ibis_xor", ops.BitwiseNot: "_ibis_inv", ops.Modulus: "mod", - ops.Log10: "log10", ops.TypeOf: "typeof", ops.BitOr: "_ibis_bit_or", ops.BitAnd: "_ibis_bit_and", @@ -106,6 +107,18 @@ def _aggregate(self, funcname: str, *args, where): return sge.Filter(this=expr, expression=sge.Where(this=where)) return expr + def visit_Log10(self, op, *, arg): + return self.f.anon.log10(arg) + + def visit_Log2(self, op, *, arg): + return self.f.anon.log2(arg) + + def visit_Log(self, op, *, arg, base): + func = self.f.anon.log + if base is None: + base = math.e + return func(base, arg) + def visit_Cast(self, op, *, arg, to) -> sge.Cast: if to.is_timestamp(): if to.timezone not in (None, "UTC"): diff --git a/ibis/backends/sqlite/udf.py b/ibis/backends/sqlite/udf.py index 720e64e7458c..ee47ff704c24 100644 --- a/ibis/backends/sqlite/udf.py +++ b/ibis/backends/sqlite/udf.py @@ -124,18 +124,29 @@ def ln(arg): return math.log(arg) +@udf(skip_if_exists=True) +def log(base, arg): + """Return the logarithm of `arg` in the given `base`. + + The argument order matches the builtin sqlite function. + """ + if arg < 0: + return None + return math.log(arg, base) + + @udf(skip_if_exists=True) def log2(arg): if arg < 0: return None - return math.log(arg, 2) + return math.log2(arg) @udf(skip_if_exists=True) def log10(arg): if arg < 0: return None - return math.log(arg, 10) + return math.log10(arg) @udf(skip_if_exists=True) diff --git a/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/mssql/out.sql b/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/mssql/out.sql index a5269c3f57b9..e26477ba87a3 100644 --- a/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/mssql/out.sql +++ b/ibis/backends/tests/snapshots/test_generic/test_many_subqueries/mssql/out.sql @@ -1,20 +1,20 @@ WITH [t1] AS ( SELECT - [t0].[street] AS [street], + [t0].[street], ROW_NUMBER() OVER (ORDER BY CASE WHEN [t0].[street] IS NULL THEN 1 ELSE 0 END, [t0].[street] ASC) - 1 AS [key] FROM [data] AS [t0] ), [t7] AS ( SELECT - [t6].[street] AS [street], + [t6].[street], ROW_NUMBER() OVER (ORDER BY CASE WHEN [t6].[street] IS NULL THEN 1 ELSE 0 END, [t6].[street] ASC) - 1 AS [key] FROM ( SELECT - [t3].[street] AS [street], - [t3].[key] AS [key] + [t3].[street], + [t3].[key] FROM [t1] AS [t3] INNER JOIN ( SELECT - [t2].[key] AS [key] + [t2].[key] FROM [t1] AS [t2] ) AS [t5] ON [t3].[key] = [t5].[key] @@ -26,7 +26,7 @@ SELECT FROM [t7] AS [t9] INNER JOIN ( SELECT - [t8].[key] AS [key] + [t8].[key] FROM [t7] AS [t8] ) AS [t11] ON [t9].[key] = [t11].[key] \ No newline at end of file diff --git a/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/mssql/out.sql b/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/mssql/out.sql index 217eafe26f55..7a46232efae6 100644 --- a/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/mssql/out.sql +++ b/ibis/backends/tests/snapshots/test_sql/test_cte_refs_in_topo_order/mssql/out.sql @@ -1,11 +1,9 @@ WITH [t1] AS ( SELECT - [t0].[key] AS [key] + [t0].[key] FROM [leaf] AS [t0] WHERE - ( - 1 = 1 - ) + (1 = 1) ) SELECT [t3].[key] @@ -14,7 +12,7 @@ INNER JOIN [t1] AS [t4] ON [t3].[key] = [t4].[key] INNER JOIN ( SELECT - [t3].[key] AS [key] + [t3].[key] FROM [t1] AS [t3] INNER JOIN [t1] AS [t4] ON [t3].[key] = [t4].[key] diff --git a/ibis/backends/tests/sql/snapshots/test_select_sql/test_bug_duplicated_where/out.sql b/ibis/backends/tests/sql/snapshots/test_select_sql/test_bug_duplicated_where/out.sql index 49071225a5db..02be8512408b 100644 --- a/ibis/backends/tests/sql/snapshots/test_select_sql/test_bug_duplicated_where/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_select_sql/test_bug_duplicated_where/out.sql @@ -17,7 +17,7 @@ FROM ( ) AS "t1" ) AS "t2" WHERE - NOT "t2"."dev" IS NULL + "t2"."dev" IS NOT NULL ORDER BY "t2"."dev" DESC LIMIT 10 \ No newline at end of file diff --git a/ibis/backends/tests/sql/snapshots/test_sql/test_isnull_notnull/notnull/out.sql b/ibis/backends/tests/sql/snapshots/test_sql/test_isnull_notnull/notnull/out.sql index 567d20198fcb..ca3c22e53e49 100644 --- a/ibis/backends/tests/sql/snapshots/test_sql/test_isnull_notnull/notnull/out.sql +++ b/ibis/backends/tests/sql/snapshots/test_sql/test_isnull_notnull/notnull/out.sql @@ -1,3 +1,3 @@ SELECT - NOT "t0"."double_col" IS NULL AS "tmp" + "t0"."double_col" IS NOT NULL AS "tmp" FROM "functional_alltypes" AS "t0" \ No newline at end of file diff --git a/ibis/backends/tests/sql/test_select_sql.py b/ibis/backends/tests/sql/test_select_sql.py index 2ffaad9120f2..94a52017f763 100644 --- a/ibis/backends/tests/sql/test_select_sql.py +++ b/ibis/backends/tests/sql/test_select_sql.py @@ -47,7 +47,7 @@ def test_select_sql(alltypes, star1, expr_fn, snapshot): assert_decompile_roundtrip(expr, snapshot) -def test_nameless_table(snapshot): +def test_nameless_table(): # Generate a unique table name when we haven't passed on nameless = ibis.table([("key", "string")]) assert nameless.op().name is not None diff --git a/ibis/backends/tests/test_array.py b/ibis/backends/tests/test_array.py index 0f022b615121..c9768eff3d72 100644 --- a/ibis/backends/tests/test_array.py +++ b/ibis/backends/tests/test_array.py @@ -861,13 +861,24 @@ def test_zip(backend): raises=ClickHouseDatabaseError, reason="clickhouse nested types can't be null", ) -def test_zip_null(backend): - # the .map is workaround for https://github.com/ibis-project/ibis/issues/8641 - a = ibis.literal([1, 2, 3], type="array").map(ibis._) +@pytest.mark.never( + "bigquery", + raises=AssertionError, + reason="BigQuery converts NULLs with array type to an empty array", +) +@pytest.mark.parametrize( + "fn", + [ + param(lambda a, b: a.zip(b), id="non-null-zip-null"), + param(lambda a, b: b.zip(a), id="null-zip-non-null"), + param(lambda _, b: b.zip(b), id="null-zip-null"), + ], +) +def test_zip_null(con, fn): + a = ibis.literal([1, 2, 3], type="array") b = ibis.literal(None, type="array") - assert backend.connection.execute(a.zip(b)) is None - assert backend.connection.execute(b.zip(a)) is None - assert backend.connection.execute(b.zip(b)) is None + expr = fn(a, b) + assert con.execute(expr) is None @builtin_array diff --git a/ibis/backends/tests/test_generic.py b/ibis/backends/tests/test_generic.py index b8e986c112db..fceababc49da 100644 --- a/ibis/backends/tests/test_generic.py +++ b/ibis/backends/tests/test_generic.py @@ -1532,6 +1532,11 @@ def test_try_cast_func(con, from_val, to_type, func): reason="doesn't support OFFSET without ORDER BY", ), pytest.mark.notyet(["oracle"], raises=com.UnsupportedArgumentError), + pytest.mark.never( + ["mssql"], + raises=PyODBCProgrammingError, + reason="sqlglot generates code that requires > 0 fetch rows", + ), ], ), param( @@ -1544,11 +1549,6 @@ def test_try_cast_func(con, from_val, to_type, func): raises=GoogleBadRequest, reason="bigquery doesn't support OFFSET without LIMIT", ), - pytest.mark.notyet( - ["mssql"], - raises=PyODBCProgrammingError, - reason="mssql doesn't support OFFSET without LIMIT", - ), pytest.mark.notyet(["exasol"], raises=ExaQueryError), pytest.mark.never( ["impala"], @@ -1575,6 +1575,11 @@ def test_try_cast_func(con, from_val, to_type, func): reason="doesn't support OFFSET without ORDER BY", ), pytest.mark.notyet(["oracle"], raises=com.UnsupportedArgumentError), + pytest.mark.never( + ["mssql"], + raises=PyODBCProgrammingError, + reason="sqlglot generates code that requires > 0 fetch rows", + ), ], ), param( @@ -1618,7 +1623,18 @@ def test_static_table_slice(backend, slc, expected_count_fn): param(slice(-3, -2), lambda _: 1, id="[-3:-2]"), # positive stop param(slice(-4000, 7000), lambda _: 3700, id="[-4000:7000]"), - param(slice(-3, 2), lambda _: 0, id="[-3:2]"), + param( + slice(-3, 2), + lambda _: 0, + id="[-3:2]", + marks=[ + pytest.mark.never( + ["mssql"], + raises=PyODBCProgrammingError, + reason="sqlglot generates code that requires > 0 fetch rows", + ), + ], + ), ################## ### POSITIVE start # negative stop @@ -1735,7 +1751,7 @@ def test_dynamic_table_slice(backend, slc, expected_count_fn): @pytest.mark.notyet( ["mssql"], reason="doesn't support dynamic limit/offset; compiles incorrectly in sqlglot", - raises=AssertionError, + raises=PyODBCProgrammingError, ) @pytest.mark.notimpl( ["risingwave"], diff --git a/ibis/backends/tests/test_numeric.py b/ibis/backends/tests/test_numeric.py index d8a29f9cadc0..47629e90df68 100644 --- a/ibis/backends/tests/test_numeric.py +++ b/ibis/backends/tests/test_numeric.py @@ -943,6 +943,19 @@ def test_simple_math_functions_columns( reason="function log10(numeric, numeric) does not exist", ), ], + id="log2-explicit", + ), + param( + lambda t: t.double_col.add(1).log2(), + lambda t: np.log2(t.double_col + 1), + marks=[ + pytest.mark.notimpl(["druid"], raises=PyDruidProgrammingError), + pytest.mark.notimpl( + ["risingwave"], + raises=PsycoPg2InternalError, + reason="function log10(numeric, numeric) does not exist", + ), + ], id="log2", ), param( diff --git a/ibis/backends/tests/test_temporal.py b/ibis/backends/tests/test_temporal.py index 1c1917e299cd..8870d0a7197c 100644 --- a/ibis/backends/tests/test_temporal.py +++ b/ibis/backends/tests/test_temporal.py @@ -1140,11 +1140,11 @@ def test_timestamp_comparison_filter(backend, con, alltypes, df, func_name): raises=TypeError, reason="Invalid comparison between dtype=datetime64[ns, UTC] and datetime", ), - pytest.mark.never( - ["duckdb"], + pytest.mark.xfail_version( + duckdb=["duckdb>=0.10"], raises=DuckDBBinderException, # perhaps we should consider disallowing this in ibis as well - reason="DuckDB doesn't allow comparing timestamp with and without timezones", + reason="DuckDB doesn't allow comparing timestamp with and without timezones starting at version 0.10", ), ] diff --git a/ibis/examples/tests/test_examples.py b/ibis/examples/tests/test_examples.py index c9d0b9567dc4..40d1e2bf4afe 100644 --- a/ibis/examples/tests/test_examples.py +++ b/ibis/examples/tests/test_examples.py @@ -11,6 +11,7 @@ pytestmark = pytest.mark.examples duckdb = pytest.importorskip("duckdb") +pytest.importorskip("pins") # large files or files that are used elsewhere ignored = frozenset( diff --git a/ibis/expr/sql.py b/ibis/expr/sql.py index fcc63401415f..da35a0a26a75 100644 --- a/ibis/expr/sql.py +++ b/ibis/expr/sql.py @@ -371,18 +371,17 @@ def to_sql( # default to duckdb for SQL compilation because it supports the # widest array of ibis features for SQL backends backend = ibis.duckdb - read = "duckdb" - write = ibis.options.sql.default_dialect + dialect = ibis.options.sql.default_dialect else: - read = write = backend.dialect + dialect = backend.dialect else: try: backend = getattr(ibis, dialect) except AttributeError: raise ValueError(f"Unknown dialect {dialect}") else: - read = write = getattr(backend, "dialect", dialect) + dialect = getattr(backend, "dialect", dialect) - sql = backend._to_sql(expr.unbind(), **kwargs) - (transpiled,) = sg.transpile(sql, read=read, write=write, pretty=pretty) - return SQLString(transpiled) + sg_expr = backend._to_sqlglot(expr.unbind(), **kwargs) + sql = sg_expr.sql(dialect=dialect, pretty=pretty) + return SQLString(sql) diff --git a/ibis/tests/expr/mocks.py b/ibis/tests/expr/mocks.py index 8a4e81322750..fb4b79b35b78 100644 --- a/ibis/tests/expr/mocks.py +++ b/ibis/tests/expr/mocks.py @@ -53,10 +53,10 @@ def list_tables(self): def list_databases(self): return ["mockdb"] - def _to_sql(self, expr, **kwargs): + def _to_sqlglot(self, expr, **kwargs): import ibis - return ibis.to_sql(expr, dialect="duckdb", **kwargs) + return ibis.duckdb._to_sqlglot(expr, **kwargs) def fetch_from_cursor(self, cursor, schema): pass diff --git a/ibis/tests/util.py b/ibis/tests/util.py index 47df8e59ebf9..8e3f3ee8e094 100644 --- a/ibis/tests/util.py +++ b/ibis/tests/util.py @@ -5,6 +5,8 @@ import pickle from typing import Callable +import pytest + import ibis import ibis.expr.types as ir from ibis import util @@ -55,6 +57,8 @@ def assert_decompile_roundtrip( operations from table.view() calls, or other relations whose equality is difficult to roundtrip. """ + pytest.importorskip("black") + rendered = ibis.decompile(expr, format=True) if snapshot is not None: snapshot.assert_match(rendered, "decompiled.py") diff --git a/poetry.lock b/poetry.lock index 03a43d9d102e..72e803a2f1d3 100644 --- a/poetry.lock +++ b/poetry.lock @@ -6381,13 +6381,13 @@ sqlcipher = ["sqlcipher3_binary"] [[package]] name = "sqlglot" -version = "22.4.0" +version = "22.5.0" description = "An easily customizable SQL parser and transpiler" optional = false python-versions = ">=3.7" files = [ - {file = "sqlglot-22.4.0-py3-none-any.whl", hash = "sha256:9f5fb0e7c35b24bf2b30976b248f28bdfd8187d533a49ba15368aa0135837cf8"}, - {file = "sqlglot-22.4.0.tar.gz", hash = "sha256:959b93e0ea31fbd8ae51e38bb9f395d2cb1bc97cd39ecd1972135a70cb5b1815"}, + {file = "sqlglot-22.5.0-py3-none-any.whl", hash = "sha256:ef11f7e56e93732aca3caab3c74a6c11489e383a43c2ac5b5f86cc85517ef9f3"}, + {file = "sqlglot-22.5.0.tar.gz", hash = "sha256:27c7850298b62741d78f99f17904014cc6fa9bebb2fd29fb2e9a0a195ec2a523"}, ] [package.extras] @@ -7332,4 +7332,4 @@ visualization = ["graphviz"] [metadata] lock-version = "2.0" python-versions = "^3.9" -content-hash = "f21a96d782004797cbd6e9b574a08e9544da84d8cf0afcb99dd79d2485cd8dac" +content-hash = "a18f00aab7bf153e449833bf5acd4db3aa439a9066d1ad3d63c20729eb6714d6" diff --git a/pyproject.toml b/pyproject.toml index c7ae66e2dfb7..0b2330fb7957 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -47,7 +47,7 @@ pyarrow-hotfix = ">=0.4,<1" python-dateutil = ">=2.8.2,<3" pytz = ">=2022.7" rich = ">=12.4.4,<14" -sqlglot = ">=22.4,<22.6" +sqlglot = ">=22.5,<22.6" toolz = ">=0.11,<1" typing-extensions = ">=4.3.0,<5" black = { version = ">=22.1.0,<25", optional = true } diff --git a/requirements-dev.txt b/requirements-dev.txt index 5a09c06f0d29..de0b3c5beb8b 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -235,7 +235,7 @@ sortedcontainers==2.4.0 ; python_version >= "3.9" and python_version < "4.0" soupsieve==2.5 ; python_version >= "3.10" and python_version < "3.13" sphobjinv==2.3.1 ; python_version >= "3.10" and python_version < "3.13" sqlalchemy==2.0.28 ; python_version >= "3.9" and python_version < "4.0" -sqlglot==22.4.0 ; python_version >= "3.9" and python_version < "4.0" +sqlglot==22.5.0 ; python_version >= "3.9" and python_version < "4.0" stack-data==0.6.3 ; python_version >= "3.9" and python_version < "4.0" statsmodels==0.14.1 ; python_version >= "3.10" and python_version < "3.13" stdlib-list==0.10.0 ; python_version >= "3.9" and python_version < "4.0"