Skip to content

Commit

Permalink
fix(deps): bump sqlglot to pick up duckdb array fixes (#8682)
Browse files Browse the repository at this point in the history
  • Loading branch information
cpcloud authored Mar 18, 2024
1 parent 6e047b0 commit a3bd853
Show file tree
Hide file tree
Showing 36 changed files with 159 additions and 74 deletions.
8 changes: 7 additions & 1 deletion .github/workflows/ibis-backends.yml
Original file line number Diff line number Diff line change
Expand Up @@ -61,12 +61,18 @@ jobs:
extras:
- duckdb
- deltalake
- geospatial
- examples
- decompiler
- polars
additional_deps:
- torch
# TODO: remove this duckdb job once the next duckdb_spatial is released
- name: duckdb
title: DuckDB + Geospatial
extras:
- geospatial
additional_deps:
- "duckdb==0.9.2"
- name: clickhouse
title: ClickHouse
services:
Expand Down
4 changes: 2 additions & 2 deletions ibis/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ def __getattr__(name: str) -> BaseBackend:
# - has_operation
# - add_operation
# - _from_url
# - _to_sql
# - _to_sqlglot
#
# We also copy over the docstring from `do_connect` to the proxy `connect`
# method, since that's where all the backend-specific kwargs are currently
Expand All @@ -119,7 +119,7 @@ def connect(*args, **kwargs):
proxy.add_operation = backend.add_operation
proxy.name = name
proxy._from_url = backend._from_url
proxy._to_sql = backend._to_sql
proxy._to_sqlglot = backend._to_sqlglot
# Add any additional methods that should be exposed at the top level
for name in getattr(backend, "_top_level_methods", ()):
setattr(proxy, name, getattr(backend, name))
Expand Down
4 changes: 2 additions & 2 deletions ibis/backends/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -926,8 +926,8 @@ def compile(
"""Compile an expression."""
return self.compiler.to_sql(expr, params=params)

def _to_sql(self, expr: ir.Expr, **kwargs) -> str:
"""Convert an expression to a SQL string.
def _to_sqlglot(self, expr: ir.Expr, **kwargs) -> sg.exp.Expression:
"""Convert an Ibis expression to a sqlglot expression.
Called by `ibis.to_sql`; gives the backend an opportunity to generate
nicer SQL for human consumption.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
SELECT
"t0"."id" IN (SELECT
arrayJoin("t1"."ids") AS "ids"
FROM "way_view" AS "t1") AS "InSubquery(id)"
"t0"."id" IN (
SELECT
arrayJoin("t1"."ids") AS "ids"
FROM "way_view" AS "t1"
) AS "InSubquery(id)"
FROM "node_view" AS "t0"
4 changes: 3 additions & 1 deletion ibis/backends/druid/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,6 @@ class DruidCompiler(SQLGlotCompiler):
ops.BitwiseRightShift: "bitwise_shift_right",
ops.Modulus: "mod",
ops.Power: "power",
ops.Log10: "log10",
ops.ApproxCountDistinct: "approx_count_distinct",
ops.StringContains: "contains_string",
}
Expand All @@ -99,6 +98,9 @@ def _aggregate(self, funcname: str, *args, where):
return sg.exp.Filter(this=expr, expression=sg.exp.Where(this=where))
return expr

def visit_Log10(self, op, *, arg):
return self.f.anon.log10(arg)

def visit_Sum(self, op, *, arg, where):
arg = self.if_(arg, 1, 0) if op.arg.dtype.is_boolean() else arg
return self.agg.sum(arg, where=where)
Expand Down
4 changes: 2 additions & 2 deletions ibis/backends/duckdb/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1371,7 +1371,7 @@ def to_parquet(
"""
self._run_pre_execute_hooks(expr)
query = self._to_sql(expr, params=params)
query = self.compile(expr, params=params)
args = ["FORMAT 'parquet'", *(f"{k.upper()} {v!r}" for k, v in kwargs.items())]
copy_cmd = f"COPY ({query}) TO {str(path)!r} ({', '.join(args)})"
with self._safe_raw_sql(copy_cmd):
Expand Down Expand Up @@ -1407,7 +1407,7 @@ def to_csv(
"""
self._run_pre_execute_hooks(expr)
query = self._to_sql(expr, params=params)
query = self.compile(expr, params=params)
args = [
"FORMAT 'csv'",
f"HEADER {int(header)}",
Expand Down
2 changes: 2 additions & 0 deletions ibis/backends/duckdb/tests/test_geospatial.py
Original file line number Diff line number Diff line change
Expand Up @@ -304,6 +304,8 @@ def test_literal_geospatial_inferred(con, shp, expected, snapshot):
reason="nix on linux cannot download duckdb extensions or data due to sandboxing",
)
def test_load_geo_example(con):
pytest.importorskip("pins")

t = ibis.examples.zones.fetch(backend=con)
assert t.geom.type().is_geospatial()

Expand Down
6 changes: 4 additions & 2 deletions ibis/backends/flink/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -333,8 +333,10 @@ def compile(
"""Compile an Ibis expression to Flink."""
return super().compile(expr, params=params) # Discard `limit` and other kwargs.

def _to_sql(self, expr: ir.Expr, **kwargs: Any) -> str:
return str(self.compile(expr, **kwargs))
def _to_sqlglot(
self, expr: ir.Expr, params: Mapping[ir.Expr, Any] | None = None, **_: Any
) -> str:
return super()._to_sqlglot(expr, params=params)

def execute(self, expr: ir.Expr, **kwargs: Any) -> Any:
"""Execute an expression."""
Expand Down
8 changes: 6 additions & 2 deletions ibis/backends/impala/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,6 @@ class ImpalaCompiler(SQLGlotCompiler):
ops.Hash: "fnv_hash",
ops.LStrip: "ltrim",
ops.Ln: "ln",
ops.Log10: "log10",
ops.Log2: "log2",
ops.RandomUUID: "uuid",
ops.RStrip: "rtrim",
ops.Strip: "trim",
Expand Down Expand Up @@ -114,6 +112,12 @@ def _minimize_spec(start, end, spec):
return None
return spec

def visit_Log2(self, op, *, arg):
return self.f.anon.log2(arg)

def visit_Log10(self, op, *, arg):
return self.f.anon.log10(arg)

def visit_Literal(self, op, *, value, dtype):
if value is None and dtype.is_binary():
return NULL
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@ SELECT
`t0`.`b`
FROM `table` AS `t0`
WHERE
NOT `t0`.`a` IS NULL = NOT `t0`.`b` IS NULL
`t0`.`a` IS NOT NULL = `t0`.`b` IS NOT NULL
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
SELECT
NOT (
`t0`.`a` IN ('foo') AND NOT `t0`.`c` IS NULL
`t0`.`a` IN ('foo') AND `t0`.`c` IS NOT NULL
) AS `tmp`
FROM `t` AS `t0`
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
SELECT
NOT `t0`.`a` IS NULL AS `NotNull(a)`
`t0`.`a` IS NOT NULL AS `NotNull(a)`
FROM `alltypes` AS `t0`
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
SELECT
CAST(FROM_UNIXTIME(CAST(`t0`.`c` AS INT)) AS TIMESTAMP) AS `TimestampFromUNIX(c, SECOND)`
CAST(FROM_UNIXTIME(CAST(`t0`.`c` AS INT), 'yyyy-MM-dd HH:mm:ss') AS TIMESTAMP) AS `TimestampFromUNIX(c, SECOND)`
FROM `alltypes` AS `t0`
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
SELECT
CAST(FROM_UNIXTIME(CAST(CAST(`t0`.`c` / 1000 AS INT) AS INT)) AS TIMESTAMP) AS `TimestampFromUNIX(c, MILLISECOND)`
CAST(FROM_UNIXTIME(CAST(CAST(`t0`.`c` / 1000 AS INT) AS INT), 'yyyy-MM-dd HH:mm:ss') AS TIMESTAMP) AS `TimestampFromUNIX(c, MILLISECOND)`
FROM `alltypes` AS `t0`
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
SELECT
CAST(FROM_UNIXTIME(CAST(CAST(`t0`.`c` / 1000000 AS INT) AS INT)) AS TIMESTAMP) AS `TimestampFromUNIX(c, MICROSECOND)`
CAST(FROM_UNIXTIME(CAST(CAST(`t0`.`c` / 1000000 AS INT) AS INT), 'yyyy-MM-dd HH:mm:ss') AS TIMESTAMP) AS `TimestampFromUNIX(c, MICROSECOND)`
FROM `alltypes` AS `t0`
6 changes: 5 additions & 1 deletion ibis/backends/snowflake/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -324,7 +324,11 @@ def visit_RegexExtract(self, op, *, arg, pattern, index):
)

def visit_ArrayZip(self, op, *, arg):
return self.f.udf.array_zip(self.f.array(*arg))
return self.if_(
sg.not_(sg.or_(*(arr.is_(NULL) for arr in arg))),
self.f.udf.array_zip(self.f.array(*arg)),
NULL,
)

def visit_DayOfWeekName(self, op, *, arg):
return sge.Case(
Expand Down
3 changes: 0 additions & 3 deletions ibis/backends/sql/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,9 +120,6 @@ def compile(
self._log(sql)
return sql

def _to_sql(self, expr: ir.Expr, **kwargs) -> str:
return self.compile(expr, **kwargs)

def _log(self, sql: str) -> None:
"""Log `sql`.
Expand Down
2 changes: 1 addition & 1 deletion ibis/backends/sql/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -925,7 +925,7 @@ def visit_ExistsSubquery(self, op, *, rel):
return self.f.exists(select)

def visit_InSubquery(self, op, *, rel, needle):
return needle.isin(rel.this)
return needle.isin(query=rel.this)

def visit_Array(self, op, *, exprs):
return self.f.array(*exprs)
Expand Down
15 changes: 14 additions & 1 deletion ibis/backends/sqlite/compiler.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from __future__ import annotations

import math

import sqlglot as sg
import sqlglot.expressions as sge
from public import public
Expand Down Expand Up @@ -87,7 +89,6 @@ class SQLiteCompiler(SQLGlotCompiler):
ops.BitwiseXor: "_ibis_xor",
ops.BitwiseNot: "_ibis_inv",
ops.Modulus: "mod",
ops.Log10: "log10",
ops.TypeOf: "typeof",
ops.BitOr: "_ibis_bit_or",
ops.BitAnd: "_ibis_bit_and",
Expand All @@ -106,6 +107,18 @@ def _aggregate(self, funcname: str, *args, where):
return sge.Filter(this=expr, expression=sge.Where(this=where))
return expr

def visit_Log10(self, op, *, arg):
return self.f.anon.log10(arg)

def visit_Log2(self, op, *, arg):
return self.f.anon.log2(arg)

def visit_Log(self, op, *, arg, base):
func = self.f.anon.log
if base is None:
base = math.e
return func(base, arg)

def visit_Cast(self, op, *, arg, to) -> sge.Cast:
if to.is_timestamp():
if to.timezone not in (None, "UTC"):
Expand Down
15 changes: 13 additions & 2 deletions ibis/backends/sqlite/udf.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,18 +124,29 @@ def ln(arg):
return math.log(arg)


@udf(skip_if_exists=True)
def log(base, arg):
"""Return the logarithm of `arg` in the given `base`.
The argument order matches the builtin sqlite function.
"""
if arg < 0:
return None
return math.log(arg, base)


@udf(skip_if_exists=True)
def log2(arg):
if arg < 0:
return None
return math.log(arg, 2)
return math.log2(arg)


@udf(skip_if_exists=True)
def log10(arg):
if arg < 0:
return None
return math.log(arg, 10)
return math.log10(arg)


@udf(skip_if_exists=True)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,20 +1,20 @@
WITH [t1] AS (
SELECT
[t0].[street] AS [street],
[t0].[street],
ROW_NUMBER() OVER (ORDER BY CASE WHEN [t0].[street] IS NULL THEN 1 ELSE 0 END, [t0].[street] ASC) - 1 AS [key]
FROM [data] AS [t0]
), [t7] AS (
SELECT
[t6].[street] AS [street],
[t6].[street],
ROW_NUMBER() OVER (ORDER BY CASE WHEN [t6].[street] IS NULL THEN 1 ELSE 0 END, [t6].[street] ASC) - 1 AS [key]
FROM (
SELECT
[t3].[street] AS [street],
[t3].[key] AS [key]
[t3].[street],
[t3].[key]
FROM [t1] AS [t3]
INNER JOIN (
SELECT
[t2].[key] AS [key]
[t2].[key]
FROM [t1] AS [t2]
) AS [t5]
ON [t3].[key] = [t5].[key]
Expand All @@ -26,7 +26,7 @@ SELECT
FROM [t7] AS [t9]
INNER JOIN (
SELECT
[t8].[key] AS [key]
[t8].[key]
FROM [t7] AS [t8]
) AS [t11]
ON [t9].[key] = [t11].[key]
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
WITH [t1] AS (
SELECT
[t0].[key] AS [key]
[t0].[key]
FROM [leaf] AS [t0]
WHERE
(
1 = 1
)
(1 = 1)
)
SELECT
[t3].[key]
Expand All @@ -14,7 +12,7 @@ INNER JOIN [t1] AS [t4]
ON [t3].[key] = [t4].[key]
INNER JOIN (
SELECT
[t3].[key] AS [key]
[t3].[key]
FROM [t1] AS [t3]
INNER JOIN [t1] AS [t4]
ON [t3].[key] = [t4].[key]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ FROM (
) AS "t1"
) AS "t2"
WHERE
NOT "t2"."dev" IS NULL
"t2"."dev" IS NOT NULL
ORDER BY
"t2"."dev" DESC
LIMIT 10
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
SELECT
NOT "t0"."double_col" IS NULL AS "tmp"
"t0"."double_col" IS NOT NULL AS "tmp"
FROM "functional_alltypes" AS "t0"
2 changes: 1 addition & 1 deletion ibis/backends/tests/sql/test_select_sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def test_select_sql(alltypes, star1, expr_fn, snapshot):
assert_decompile_roundtrip(expr, snapshot)


def test_nameless_table(snapshot):
def test_nameless_table():
# Generate a unique table name when we haven't passed on
nameless = ibis.table([("key", "string")])
assert nameless.op().name is not None
Expand Down
23 changes: 17 additions & 6 deletions ibis/backends/tests/test_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -861,13 +861,24 @@ def test_zip(backend):
raises=ClickHouseDatabaseError,
reason="clickhouse nested types can't be null",
)
def test_zip_null(backend):
# the .map is workaround for https://github.com/ibis-project/ibis/issues/8641
a = ibis.literal([1, 2, 3], type="array<int64>").map(ibis._)
@pytest.mark.never(
"bigquery",
raises=AssertionError,
reason="BigQuery converts NULLs with array type to an empty array",
)
@pytest.mark.parametrize(
"fn",
[
param(lambda a, b: a.zip(b), id="non-null-zip-null"),
param(lambda a, b: b.zip(a), id="null-zip-non-null"),
param(lambda _, b: b.zip(b), id="null-zip-null"),
],
)
def test_zip_null(con, fn):
a = ibis.literal([1, 2, 3], type="array<int64>")
b = ibis.literal(None, type="array<int64>")
assert backend.connection.execute(a.zip(b)) is None
assert backend.connection.execute(b.zip(a)) is None
assert backend.connection.execute(b.zip(b)) is None
expr = fn(a, b)
assert con.execute(expr) is None


@builtin_array
Expand Down
Loading

0 comments on commit a3bd853

Please sign in to comment.