From 21384c3da92708760f7049beb1ae2258679e2fd1 Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Thu, 22 Feb 2024 11:25:12 -0500 Subject: [PATCH 1/7] chore: resurrect `SupportsSchema` type; clean up unused ignored imports (#8421) --- ibis/common/typing.py | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/ibis/common/typing.py b/ibis/common/typing.py index 255939c96aad8..5d4bca1d16788 100644 --- a/ibis/common/typing.py +++ b/ibis/common/typing.py @@ -4,25 +4,27 @@ import sys from abc import abstractmethod from itertools import zip_longest -from types import ModuleType # noqa: F401 -from typing import ( - TYPE_CHECKING, - Any, - Generic, # noqa: F401 - Optional, - TypeVar, - Union, - get_args, - get_origin, -) +from typing import TYPE_CHECKING, Any, Optional, TypeVar, Union, get_args, get_origin from typing import get_type_hints as _get_type_hints from ibis.common.bases import Abstract from ibis.common.caching import memoize if TYPE_CHECKING: + from collections.abc import Iterable, Mapping + from typing_extensions import Self + import ibis.expr.datatypes as dt + import ibis.expr.schema as sch + + SupportsSchema = TypeVar( + "SupportsSchema", + sch.Schema, + Mapping[str, str | dt.DataType], + Iterable[tuple[str, str | dt.DataType]], + ) + if sys.version_info >= (3, 10): from types import UnionType from typing import TypeAlias @@ -144,6 +146,7 @@ def get_bound_typevars(obj: Any) -> dict[TypeVar, tuple[str, type]]: Examples -------- + >>> from typing import Generic >>> class MyStruct(Generic[T, U]): ... a: T ... b: U From ad1f53a484d124b2a089c92f83c284a11e312cb5 Mon Sep 17 00:00:00 2001 From: Jim Crist-Harif Date: Thu, 22 Feb 2024 11:22:47 -0600 Subject: [PATCH 2/7] refactor: rename SupportSchema -> SchemaLike, fix type definition (#8427) --- ibis/backends/sql/__init__.py | 4 ++-- ibis/common/typing.py | 11 ----------- ibis/expr/api.py | 12 ++++++------ ibis/expr/schema.py | 10 +++++++++- ibis/expr/types/relations.py | 8 ++++---- 5 files changed, 21 insertions(+), 24 deletions(-) diff --git a/ibis/backends/sql/__init__.py b/ibis/backends/sql/__init__.py index 3d248014f7380..e201d4a8930a6 100644 --- a/ibis/backends/sql/__init__.py +++ b/ibis/backends/sql/__init__.py @@ -22,7 +22,7 @@ import ibis.expr.datatypes as dt from ibis.backends.sql.compiler import SQLGlotCompiler - from ibis.common.typing import SupportsSchema + from ibis.expr.schema import SchemaLike class SQLBackend(BaseBackend): @@ -137,7 +137,7 @@ def _log(self, sql: str) -> None: def sql( self, query: str, - schema: SupportsSchema | None = None, + schema: SchemaLike | None = None, dialect: str | None = None, ) -> ir.Table: query = self._transpile_sql(query, dialect=dialect) diff --git a/ibis/common/typing.py b/ibis/common/typing.py index 5d4bca1d16788..9c923243c1460 100644 --- a/ibis/common/typing.py +++ b/ibis/common/typing.py @@ -11,19 +11,8 @@ from ibis.common.caching import memoize if TYPE_CHECKING: - from collections.abc import Iterable, Mapping - from typing_extensions import Self - import ibis.expr.datatypes as dt - import ibis.expr.schema as sch - - SupportsSchema = TypeVar( - "SupportsSchema", - sch.Schema, - Mapping[str, str | dt.DataType], - Iterable[tuple[str, str | dt.DataType]], - ) if sys.version_info >= (3, 10): from types import UnionType diff --git a/ibis/expr/api.py b/ibis/expr/api.py index 5bdfb712d2730..9410737db0d59 100644 --- a/ibis/expr/api.py +++ b/ibis/expr/api.py @@ -48,7 +48,7 @@ import pandas as pd import pyarrow as pa - from ibis.common.typing import SupportsSchema + from ibis.expr.schema import SchemaLike __all__ = ( "aggregate", @@ -266,7 +266,7 @@ def param(type: dt.DataType) -> ir.Scalar: def schema( - pairs: SupportsSchema | None = None, + pairs: SchemaLike | None = None, names: Iterable[str] | None = None, types: Iterable[str | dt.DataType] | None = None, ) -> sch.Schema: @@ -310,7 +310,7 @@ def schema( def table( - schema: SupportsSchema | None = None, + schema: SchemaLike | None = None, name: str | None = None, ) -> ir.Table: """Create a table literal or an abstract table without data. @@ -352,7 +352,7 @@ def memtable( data, *, columns: Iterable[str] | None = None, - schema: SupportsSchema | None = None, + schema: SchemaLike | None = None, name: str | None = None, ) -> Table: """Construct an ibis table expression from in-memory data. @@ -441,7 +441,7 @@ def _memtable( data: pd.DataFrame | Any, *, columns: Iterable[str] | None = None, - schema: SupportsSchema | None = None, + schema: SchemaLike | None = None, name: str | None = None, ) -> Table: import pandas as pd @@ -493,7 +493,7 @@ def _memtable_from_pyarrow_table( data: pa.Table, *, name: str | None = None, - schema: SupportsSchema | None = None, + schema: SchemaLike | None = None, columns: Iterable[str] | None = None, ): from ibis.formats.pyarrow import PyArrowTableProxy diff --git a/ibis/expr/schema.py b/ibis/expr/schema.py index 96806c01fe5eb..d7a999507ae1f 100644 --- a/ibis/expr/schema.py +++ b/ibis/expr/schema.py @@ -1,7 +1,7 @@ from __future__ import annotations from collections.abc import Iterable, Iterator, Mapping -from typing import TYPE_CHECKING, Any +from typing import TYPE_CHECKING, Any, Union import ibis.expr.datatypes as dt from ibis.common.annotations import attribute @@ -14,6 +14,7 @@ if TYPE_CHECKING: import pandas as pd + from typing_extensions import TypeAlias class Schema(Concrete, Coercible, MapSet): @@ -220,6 +221,13 @@ def apply_to(self, df: pd.DataFrame) -> pd.DataFrame: return PandasData.convert_table(df, self) +SchemaLike: TypeAlias = Union[ + Schema, + Mapping[str, Union[str, dt.DataType]], + Iterable[tuple[str, Union[str, dt.DataType]]], +] + + @lazy_singledispatch def schema(value: Any) -> Schema: """Construct ibis schema from schema-like python objects.""" diff --git a/ibis/expr/types/relations.py b/ibis/expr/types/relations.py index cc2640d21156a..9606ed757bd29 100644 --- a/ibis/expr/types/relations.py +++ b/ibis/expr/types/relations.py @@ -33,8 +33,8 @@ import ibis.expr.types as ir import ibis.selectors as s - from ibis.common.typing import SupportsSchema from ibis.expr.operations.relations import JoinKind + from ibis.expr.schema import SchemaLike from ibis.expr.types import Table from ibis.expr.types.groupby import GroupedTable from ibis.expr.types.tvf import WindowedTable @@ -361,7 +361,7 @@ def __contains__(self, name: str) -> bool: """ return name in self.schema() - def cast(self, schema: SupportsSchema) -> Table: + def cast(self, schema: SchemaLike) -> Table: """Cast the columns of a table. Similar to `pandas.DataFrame.astype`. @@ -438,7 +438,7 @@ def cast(self, schema: SupportsSchema) -> Table: """ return self._cast(schema, cast_method="cast") - def try_cast(self, schema: SupportsSchema) -> Table: + def try_cast(self, schema: SchemaLike) -> Table: """Cast the columns of a table. If the cast fails for a row, the value is returned @@ -472,7 +472,7 @@ def try_cast(self, schema: SupportsSchema) -> Table: """ return self._cast(schema, cast_method="try_cast") - def _cast(self, schema: SupportsSchema, cast_method: str = "cast") -> Table: + def _cast(self, schema: SchemaLike, cast_method: str = "cast") -> Table: schema = sch.schema(schema) cols = [] From 0275c9bcfb2905c916e0d8abbb5b78315f14a5f6 Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Thu, 22 Feb 2024 13:56:30 -0500 Subject: [PATCH 3/7] feat(snowflake): allow empty url when using ibis.connect (#8428) Follow-up to #8422 to support that behavior in `ibis.connect()`. --- ibis/backends/snowflake/__init__.py | 26 +++++++++++--------- ibis/backends/snowflake/tests/test_client.py | 3 +++ 2 files changed, 18 insertions(+), 11 deletions(-) diff --git a/ibis/backends/snowflake/__init__.py b/ibis/backends/snowflake/__init__.py index 2618b66c7e9ec..473e0fb6a79dd 100644 --- a/ibis/backends/snowflake/__init__.py +++ b/ibis/backends/snowflake/__init__.py @@ -106,17 +106,21 @@ def _from_url(self, url: str, **kwargs): """ url = urlparse(url) - database, schema = url.path[1:].split("/", 1) - query_params = parse_qs(url.query) - (warehouse,) = query_params.pop("warehouse", (None,)) - connect_args = { - "user": url.username, - "password": url.password or "", - "account": url.hostname, - "warehouse": warehouse, - "database": database or "", - "schema": schema or "", - } + if url.path: + database, schema = url.path[1:].split("/", 1) + query_params = parse_qs(url.query) + (warehouse,) = query_params.pop("warehouse", (None,)) + connect_args = { + "user": url.username, + "password": url.password or "", + "account": url.hostname, + "warehouse": warehouse, + "database": database or "", + "schema": schema or "", + } + else: + connect_args = {} + query_params = {} for name, value in query_params.items(): if len(value) > 1: diff --git a/ibis/backends/snowflake/tests/test_client.py b/ibis/backends/snowflake/tests/test_client.py index 1af1f083c6e10..7f78a87eeb4df 100644 --- a/ibis/backends/snowflake/tests/test_client.py +++ b/ibis/backends/snowflake/tests/test_client.py @@ -301,3 +301,6 @@ def test_compile_does_not_make_requests(con, mocker): def test_no_argument_connection(): con = ibis.snowflake.connect() assert con.list_tables() is not None + + con = ibis.connect("snowflake://") + assert con.list_tables() is not None From 753a26854caf339cf1f6949b490f100b89d7df0c Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Thu, 22 Feb 2024 13:57:40 -0500 Subject: [PATCH 4/7] docs: clean up cursor closing and link to `Table.sql` API docs (#8417) Closes #8345. --------- Co-authored-by: Gil Forsyth --- docs/how-to/extending/sql.qmd | 28 +++++++++++++++++++--------- ibis/config.py | 2 +- 2 files changed, 20 insertions(+), 10 deletions(-) diff --git a/docs/how-to/extending/sql.qmd b/docs/how-to/extending/sql.qmd index a336b5f8dc1ab..c29ba4de4afc1 100644 --- a/docs/how-to/extending/sql.qmd +++ b/docs/how-to/extending/sql.qmd @@ -34,7 +34,7 @@ t = ibis.examples.penguins.fetch(backend=con, table_name="penguins") # <2> 1. Connect to an in-memory DuckDB database 2. Read in the `penguins` example with our DuckDB database, and name it `penguins` -## `Table.sql` +## [`Table.sql`](../../reference/expression-tables.qmd#ibis.expr.types.relations.Table.sql) At the highest level there's the `Table.sql` method. This method allows you to run arbitrary `SELECT` statements against a table expression: @@ -119,6 +119,10 @@ another. ## `Backend.sql` +::: {.callout-tip} +## `Backend.sql` supports the `dialect` argument. +::: + There's also the `Backend.sql` method, which can handle arbitrary `SELECT` statements as well and returns an Ibis table expression. @@ -126,7 +130,7 @@ The main difference with `Table.sql` is that `Backend.sql` **can only refer to tables that already exist in the database**, because the API is defined on `Backend` instances. -After the `Backend.sql` call, however, you're able to mix and match similar +After calling `Backend.sql`, however, you're able to mix and match similar to `Table.sql`: ```{python} @@ -147,10 +151,6 @@ to `Table.sql`: ) ``` -::: {.callout-tip} -## `Backend.sql` also supports the `dialect` argument. -::: - ## `Backend.raw_sql` At the lowest level there's `Backend.raw_sql` which is for those situations @@ -161,16 +161,26 @@ modeled as a table expression. with the SQL statement's execution. ::: {.callout-caution} -## You **must** close the cursor returned from `raw_sql` to avoid leaking resources +## You may need to close the cursor returned from `raw_sql` to avoid leaking resources + +Failure to do so can result in a variety of errors and hard-to-debug behaviors. + +For DDL statements, you may not need to close the cursor since DDL statements +do not produce results. -Failure to do results in variety of errors and hard-to-debug behaviors. +Depending on the backend you may have to experiment to see when closing the +cursor is necessary. + +In most cases a cursor returned from a `SELECT` statement requires a call to +`close()`. The easiest way to do this is to use a context manager: ```{python} from contextlib import closing -with closing(con.raw_sql("CREATE TEMP TABLE my_table AS SELECT * FROM RANGE(10)")) as c: + +with closing(con.raw_sql("SELECT * FROM RANGE(10)")) as c: ... # do something with c if necessary ``` ::: diff --git a/ibis/config.py b/ibis/config.py index ecdb8a9aeef27..3544366211996 100644 --- a/ibis/config.py +++ b/ibis/config.py @@ -147,7 +147,7 @@ class Options(Config): A callable to use when logging. graphviz_repr : bool Render expressions as GraphViz PNGs when running in a Jupyter notebook. - default_backend : Optional[ibis.backends.BaseBackend], default None + default_backend : Optional[ibis.backends.BaseBackend] The default backend to use for execution, defaults to DuckDB if not set. context_adjustment : ContextAdjustment From 09b6adaeecf1b6388866856c795022cfca4b2679 Mon Sep 17 00:00:00 2001 From: Chloe He Date: Thu, 22 Feb 2024 16:11:02 -0800 Subject: [PATCH 5/7] fix(ir): fix window boundaries being forcefully casted (#8400) --- .../test_window/test_rows_window/out.sql | 2 +- ibis/backends/tests/test_window.py | 18 +++++++++++++++--- ibis/expr/builders.py | 13 ++++--------- ibis/expr/operations/window.py | 12 ++++++++---- 4 files changed, 28 insertions(+), 17 deletions(-) diff --git a/ibis/backends/flink/tests/snapshots/test_window/test_rows_window/out.sql b/ibis/backends/flink/tests/snapshots/test_window/test_rows_window/out.sql index 7b5bb83d8c921..c728453bd5271 100644 --- a/ibis/backends/flink/tests/snapshots/test_window/test_rows_window/out.sql +++ b/ibis/backends/flink/tests/snapshots/test_window/test_rows_window/out.sql @@ -1,3 +1,3 @@ SELECT - SUM(`t0`.`f`) OVER (ORDER BY `t0`.`f` ASC NULLS LAST ROWS BETWEEN 1000 preceding AND CAST(0 AS SMALLINT) following) AS `Sum(f)` + SUM(`t0`.`f`) OVER (ORDER BY `t0`.`f` ASC NULLS LAST ROWS BETWEEN 1000 preceding AND CURRENT ROW) AS `Sum(f)` FROM `table` AS `t0` \ No newline at end of file diff --git a/ibis/backends/tests/test_window.py b/ibis/backends/tests/test_window.py index 21cfba79f55e8..64276745bf7ff 100644 --- a/ibis/backends/tests/test_window.py +++ b/ibis/backends/tests/test_window.py @@ -453,7 +453,7 @@ def test_grouped_bounded_following_window(backend, alltypes, df, preceding, foll @pytest.mark.parametrize( - "window_fn", + "window_fn, window_size", [ param( lambda t: ibis.window( @@ -462,6 +462,7 @@ def test_grouped_bounded_following_window(backend, alltypes, df, preceding, foll group_by=[t.string_col], order_by=[t.id], ), + 3, id="preceding-2-following-0", ), param( @@ -471,18 +472,29 @@ def test_grouped_bounded_following_window(backend, alltypes, df, preceding, foll group_by=[t.string_col], order_by=[t.id], ), + 3, id="preceding-2-following-0-tuple", ), param( lambda t: ibis.trailing_window( preceding=2, group_by=[t.string_col], order_by=[t.id] ), + 3, id="trailing-2", ), + param( + lambda t: ibis.window( + preceding=1000, following=0, group_by=[t.string_col], order_by=[t.id] + ), + 1001, + id="large-preceding-1000-following-0", + ), ], ) @pytest.mark.notimpl(["polars"], raises=com.OperationNotDefinedError) -def test_grouped_bounded_preceding_window(backend, alltypes, df, window_fn): +def test_grouped_bounded_preceding_window( + backend, alltypes, df, window_fn, window_size +): window = window_fn(alltypes) expr = alltypes.mutate(val=alltypes.double_col.sum().over(window)) @@ -490,7 +502,7 @@ def test_grouped_bounded_preceding_window(backend, alltypes, df, window_fn): gdf = df.sort_values("id").groupby("string_col") expected = ( df.assign( - val=gdf.double_col.rolling(3, min_periods=1) + val=gdf.double_col.rolling(window_size, min_periods=1) .sum() .sort_index(level=1) .reset_index(drop=True) diff --git a/ibis/expr/builders.py b/ibis/expr/builders.py index b15434fb948da..a1a7cde492853 100644 --- a/ibis/expr/builders.py +++ b/ibis/expr/builders.py @@ -168,17 +168,12 @@ def _table(self): else: raise IbisInputError("Window frame can only depend on a single relation") - def _maybe_cast_boundary(self, boundary, dtype): - if boundary.dtype == dtype: - return boundary - value = ops.Cast(boundary.value, dtype) - return boundary.copy(value=value) - def _maybe_cast_boundaries(self, start, end): if start and end: - dtype = dt.higher_precedence(start.dtype, end.dtype) - start = self._maybe_cast_boundary(start, dtype) - end = self._maybe_cast_boundary(end, dtype) + if start.dtype.is_interval() and end.dtype.is_numeric(): + return start, ops.Cast(end.value, start.dtype) + elif start.dtype.is_numeric() and end.dtype.is_interval(): + return ops.Cast(start.value, end.dtype), end return start, end def _determine_how(self, start, end): diff --git a/ibis/expr/operations/window.py b/ibis/expr/operations/window.py index b876868530327..24628153dcc2f 100644 --- a/ibis/expr/operations/window.py +++ b/ibis/expr/operations/window.py @@ -71,10 +71,14 @@ class WindowFrame(Value): shape = ds.columnar def __init__(self, start, end, **kwargs): - if start and end and start.dtype != end.dtype: - raise com.IbisTypeError( - "Window frame start and end boundaries must have the same datatype" - ) + if start and end: + if not ( + (start.dtype.is_interval() and end.dtype.is_interval()) + or (start.dtype.is_numeric() and end.dtype.is_numeric()) + ): + raise com.IbisTypeError( + "Window frame start and end boundaries must have the same datatype" + ) super().__init__(start=start, end=end, **kwargs) def dtype(self) -> dt.DataType: From b51eb2d7e7ba6e8f4cb044f235acbda4007b9957 Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Fri, 23 Feb 2024 08:09:05 -0500 Subject: [PATCH 6/7] test(snowflake): xfail on test with window larger than 1000 (#8433) --- ibis/backends/tests/test_window.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/ibis/backends/tests/test_window.py b/ibis/backends/tests/test_window.py index 64276745bf7ff..3765dd8642f6e 100644 --- a/ibis/backends/tests/test_window.py +++ b/ibis/backends/tests/test_window.py @@ -482,11 +482,29 @@ def test_grouped_bounded_following_window(backend, alltypes, df, preceding, foll 3, id="trailing-2", ), + param( + lambda t: ibis.window( + # snowflake doesn't allow windows larger than 1000 + preceding=999, + following=0, + group_by=[t.string_col], + order_by=[t.id], + ), + 1000, + id="large-preceding-999-following-0", + ), param( lambda t: ibis.window( preceding=1000, following=0, group_by=[t.string_col], order_by=[t.id] ), 1001, + marks=[ + pytest.mark.notyet( + ["snowflake"], + raises=SnowflakeProgrammingError, + reason="Windows larger than 1000 are not allowed", + ) + ], id="large-preceding-1000-following-0", ), ], From de174a220fd24ea6b5ab9ec5b0c6aa3352d9d3aa Mon Sep 17 00:00:00 2001 From: Chloe He Date: Fri, 23 Feb 2024 10:55:37 -0800 Subject: [PATCH 7/7] fix(flink): fix compilation of over aggregation query in flink backend (#8359) --- ibis/backends/flink/compiler.py | 7 ++++ .../test_window_aggregation/out.sql | 2 +- .../test_compiler/test_window_topn/out.sql | 2 +- .../cumulate_window/out.sql | 2 +- .../test_windowing_tvf/hop_window/out.sql | 4 +- .../test_windowing_tvf/tumble_window/out.sql | 2 +- .../datetime/out.sql | 2 - .../datetime_with_microseconds/out.sql | 2 - .../string_time/out.sql | 2 - .../string_timestamp/out.sql | 2 - .../time/out.sql | 2 - .../timestamp/out.sql | 2 - .../test_window/test_range_window/out.sql | 2 +- ibis/backends/sql/dialects.py | 41 +++++++++++++++++++ 14 files changed, 56 insertions(+), 18 deletions(-) delete mode 100644 ibis/backends/flink/tests/snapshots/test_literals/test_literal_timestamp_or_time/datetime/out.sql delete mode 100644 ibis/backends/flink/tests/snapshots/test_literals/test_literal_timestamp_or_time/datetime_with_microseconds/out.sql delete mode 100644 ibis/backends/flink/tests/snapshots/test_literals/test_literal_timestamp_or_time/string_time/out.sql delete mode 100644 ibis/backends/flink/tests/snapshots/test_literals/test_literal_timestamp_or_time/string_timestamp/out.sql delete mode 100644 ibis/backends/flink/tests/snapshots/test_literals/test_literal_timestamp_or_time/time/out.sql delete mode 100644 ibis/backends/flink/tests/snapshots/test_literals/test_literal_timestamp_or_time/timestamp/out.sql diff --git a/ibis/backends/flink/compiler.py b/ibis/backends/flink/compiler.py index dda85eadb61ca..cba5dbaf7c68f 100644 --- a/ibis/backends/flink/compiler.py +++ b/ibis/backends/flink/compiler.py @@ -143,6 +143,13 @@ def _minimize_spec(start, end, spec): and end.following ): return None + elif ( + isinstance(getattr(end, "value", None), ops.Cast) + and end.value.arg.value == 0 + and end.following + ): + spec.args["end"] = "CURRENT ROW" + spec.args["end_side"] = None return spec def visit_TumbleWindowingTVF(self, op, *, table, time_col, window_size, offset): diff --git a/ibis/backends/flink/tests/snapshots/test_compiler/test_window_aggregation/out.sql b/ibis/backends/flink/tests/snapshots/test_compiler/test_window_aggregation/out.sql index 713b7ce6343ae..5275a45bec41c 100644 --- a/ibis/backends/flink/tests/snapshots/test_compiler/test_window_aggregation/out.sql +++ b/ibis/backends/flink/tests/snapshots/test_compiler/test_window_aggregation/out.sql @@ -6,7 +6,7 @@ SELECT FROM ( SELECT `t0`.* - FROM TABLE(TUMBLE(TABLE `table`, DESCRIPTOR(`i`), INTERVAL '15' MINUTE)) AS `t0` + FROM TABLE(TUMBLE(TABLE `table`, DESCRIPTOR(`i`), INTERVAL '15' MINUTE(2))) AS `t0` ) AS `t1` GROUP BY `t1`.`window_start`, diff --git a/ibis/backends/flink/tests/snapshots/test_compiler/test_window_topn/out.sql b/ibis/backends/flink/tests/snapshots/test_compiler/test_window_topn/out.sql index fe04f27ee89cb..8c66149f1116f 100644 --- a/ibis/backends/flink/tests/snapshots/test_compiler/test_window_topn/out.sql +++ b/ibis/backends/flink/tests/snapshots/test_compiler/test_window_topn/out.sql @@ -29,7 +29,7 @@ FROM ( FROM ( SELECT `t0`.* - FROM TABLE(TUMBLE(TABLE `table`, DESCRIPTOR(`i`), INTERVAL '600' SECOND)) AS `t0` + FROM TABLE(TUMBLE(TABLE `table`, DESCRIPTOR(`i`), INTERVAL '600' SECOND(3))) AS `t0` ) AS `t1` ) AS `t2` ) AS `t3` diff --git a/ibis/backends/flink/tests/snapshots/test_compiler/test_windowing_tvf/cumulate_window/out.sql b/ibis/backends/flink/tests/snapshots/test_compiler/test_windowing_tvf/cumulate_window/out.sql index e2f0514f8925b..b1c7e6e8408ac 100644 --- a/ibis/backends/flink/tests/snapshots/test_compiler/test_windowing_tvf/cumulate_window/out.sql +++ b/ibis/backends/flink/tests/snapshots/test_compiler/test_windowing_tvf/cumulate_window/out.sql @@ -1,5 +1,5 @@ SELECT `t0`.* FROM TABLE( - CUMULATE(TABLE `table`, DESCRIPTOR(`i`), INTERVAL '10' SECOND, INTERVAL '1' MINUTE) + CUMULATE(TABLE `table`, DESCRIPTOR(`i`), INTERVAL '10' SECOND(2), INTERVAL '1' MINUTE(2)) ) AS `t0` \ No newline at end of file diff --git a/ibis/backends/flink/tests/snapshots/test_compiler/test_windowing_tvf/hop_window/out.sql b/ibis/backends/flink/tests/snapshots/test_compiler/test_windowing_tvf/hop_window/out.sql index ff64fce1ccbf9..1f8c0f37bd145 100644 --- a/ibis/backends/flink/tests/snapshots/test_compiler/test_windowing_tvf/hop_window/out.sql +++ b/ibis/backends/flink/tests/snapshots/test_compiler/test_windowing_tvf/hop_window/out.sql @@ -1,3 +1,5 @@ SELECT `t0`.* -FROM TABLE(HOP(TABLE `table`, DESCRIPTOR(`i`), INTERVAL '1' MINUTE, INTERVAL '15' MINUTE)) AS `t0` \ No newline at end of file +FROM TABLE( + HOP(TABLE `table`, DESCRIPTOR(`i`), INTERVAL '1' MINUTE(2), INTERVAL '15' MINUTE(2)) +) AS `t0` \ No newline at end of file diff --git a/ibis/backends/flink/tests/snapshots/test_compiler/test_windowing_tvf/tumble_window/out.sql b/ibis/backends/flink/tests/snapshots/test_compiler/test_windowing_tvf/tumble_window/out.sql index e85caed0ef7ed..2d7e9c0899f51 100644 --- a/ibis/backends/flink/tests/snapshots/test_compiler/test_windowing_tvf/tumble_window/out.sql +++ b/ibis/backends/flink/tests/snapshots/test_compiler/test_windowing_tvf/tumble_window/out.sql @@ -1,3 +1,3 @@ SELECT `t0`.* -FROM TABLE(TUMBLE(TABLE `table`, DESCRIPTOR(`i`), INTERVAL '15' MINUTE)) AS `t0` \ No newline at end of file +FROM TABLE(TUMBLE(TABLE `table`, DESCRIPTOR(`i`), INTERVAL '15' MINUTE(2))) AS `t0` \ No newline at end of file diff --git a/ibis/backends/flink/tests/snapshots/test_literals/test_literal_timestamp_or_time/datetime/out.sql b/ibis/backends/flink/tests/snapshots/test_literals/test_literal_timestamp_or_time/datetime/out.sql deleted file mode 100644 index bb447abd40a1d..0000000000000 --- a/ibis/backends/flink/tests/snapshots/test_literals/test_literal_timestamp_or_time/datetime/out.sql +++ /dev/null @@ -1,2 +0,0 @@ -SELECT - CAST('2017-01-01 04:55:59' AS TIMESTAMP) AS `datetime.datetime(2017, 1, 1, 4, 55, 59)` \ No newline at end of file diff --git a/ibis/backends/flink/tests/snapshots/test_literals/test_literal_timestamp_or_time/datetime_with_microseconds/out.sql b/ibis/backends/flink/tests/snapshots/test_literals/test_literal_timestamp_or_time/datetime_with_microseconds/out.sql deleted file mode 100644 index d5c8151660fe6..0000000000000 --- a/ibis/backends/flink/tests/snapshots/test_literals/test_literal_timestamp_or_time/datetime_with_microseconds/out.sql +++ /dev/null @@ -1,2 +0,0 @@ -SELECT - CAST('2017-01-01 04:55:59.001122' AS TIMESTAMP) AS `datetime.datetime(2017, 1, 1, 4, 55, 59, 1122)` \ No newline at end of file diff --git a/ibis/backends/flink/tests/snapshots/test_literals/test_literal_timestamp_or_time/string_time/out.sql b/ibis/backends/flink/tests/snapshots/test_literals/test_literal_timestamp_or_time/string_time/out.sql deleted file mode 100644 index 8b62e47adfdbc..0000000000000 --- a/ibis/backends/flink/tests/snapshots/test_literals/test_literal_timestamp_or_time/string_time/out.sql +++ /dev/null @@ -1,2 +0,0 @@ -SELECT - CAST('04:55:59' AS TIMESTAMP) AS `datetime.time(4, 55, 59)` \ No newline at end of file diff --git a/ibis/backends/flink/tests/snapshots/test_literals/test_literal_timestamp_or_time/string_timestamp/out.sql b/ibis/backends/flink/tests/snapshots/test_literals/test_literal_timestamp_or_time/string_timestamp/out.sql deleted file mode 100644 index bb447abd40a1d..0000000000000 --- a/ibis/backends/flink/tests/snapshots/test_literals/test_literal_timestamp_or_time/string_timestamp/out.sql +++ /dev/null @@ -1,2 +0,0 @@ -SELECT - CAST('2017-01-01 04:55:59' AS TIMESTAMP) AS `datetime.datetime(2017, 1, 1, 4, 55, 59)` \ No newline at end of file diff --git a/ibis/backends/flink/tests/snapshots/test_literals/test_literal_timestamp_or_time/time/out.sql b/ibis/backends/flink/tests/snapshots/test_literals/test_literal_timestamp_or_time/time/out.sql deleted file mode 100644 index 8b62e47adfdbc..0000000000000 --- a/ibis/backends/flink/tests/snapshots/test_literals/test_literal_timestamp_or_time/time/out.sql +++ /dev/null @@ -1,2 +0,0 @@ -SELECT - CAST('04:55:59' AS TIMESTAMP) AS `datetime.time(4, 55, 59)` \ No newline at end of file diff --git a/ibis/backends/flink/tests/snapshots/test_literals/test_literal_timestamp_or_time/timestamp/out.sql b/ibis/backends/flink/tests/snapshots/test_literals/test_literal_timestamp_or_time/timestamp/out.sql deleted file mode 100644 index bb447abd40a1d..0000000000000 --- a/ibis/backends/flink/tests/snapshots/test_literals/test_literal_timestamp_or_time/timestamp/out.sql +++ /dev/null @@ -1,2 +0,0 @@ -SELECT - CAST('2017-01-01 04:55:59' AS TIMESTAMP) AS `datetime.datetime(2017, 1, 1, 4, 55, 59)` \ No newline at end of file diff --git a/ibis/backends/flink/tests/snapshots/test_window/test_range_window/out.sql b/ibis/backends/flink/tests/snapshots/test_window/test_range_window/out.sql index a739ff47435ab..a540abeb9bd59 100644 --- a/ibis/backends/flink/tests/snapshots/test_window/test_range_window/out.sql +++ b/ibis/backends/flink/tests/snapshots/test_window/test_range_window/out.sql @@ -1,3 +1,3 @@ SELECT - SUM(`t0`.`f`) OVER (ORDER BY `t0`.`f` ASC NULLS LAST RANGE BETWEEN INTERVAL '500' MINUTE preceding AND CAST(0 AS INTERVAL MINUTE) following) AS `Sum(f)` + SUM(`t0`.`f`) OVER (ORDER BY `t0`.`f` ASC NULLS LAST RANGE BETWEEN INTERVAL '500' MINUTE(3) preceding AND CURRENT ROW) AS `Sum(f)` FROM `table` AS `t0` \ No newline at end of file diff --git a/ibis/backends/sql/dialects.py b/ibis/backends/sql/dialects.py index 45d3d8638853b..9ac2a6e3f1f5d 100644 --- a/ibis/backends/sql/dialects.py +++ b/ibis/backends/sql/dialects.py @@ -1,6 +1,7 @@ from __future__ import annotations import contextlib +import math import sqlglot.expressions as sge from sqlglot import transforms @@ -68,6 +69,45 @@ class Generator(Postgres.Generator): } +def _calculate_precision(interval_value: int) -> int: + """Calculate interval precision. + + FlinkSQL interval data types use leading precision and fractional- + seconds precision. Because the leading precision defaults to 2, we need to + specify a different precision when the value exceeds 2 digits. + + (see + https://learn.microsoft.com/en-us/sql/odbc/reference/appendixes/interval-literals) + """ + # log10(interval_value) + 1 is equivalent to len(str(interval_value)), but is significantly + # faster and more memory-efficient + if interval_value == 0: + return 0 + if interval_value < 0: + raise ValueError( + f"Expecting value to be a non-negative integer, got {interval_value}" + ) + return int(math.log10(interval_value)) + 1 + + +def _interval_with_precision(self, e): + """Format interval with precision.""" + arg = e.args["this"].this + formatted_arg = arg + with contextlib.suppress(AttributeError): + formatted_arg = arg.sql(self.dialect) + + unit = e.args["unit"] + # when formatting interval scalars, need to quote arg and add precision + if isinstance(arg, str): + formatted_arg = f"'{formatted_arg}'" + prec = _calculate_precision(int(arg)) + prec = max(prec, 2) + unit += f"({prec})" + + return f"INTERVAL {formatted_arg} {unit}" + + class Flink(Hive): class Generator(Hive.Generator): TYPE_MAPPING = Hive.Generator.TYPE_MAPPING.copy() | { @@ -91,6 +131,7 @@ class Generator(Hive.Generator): sge.DayOfYear: rename_func("dayofyear"), sge.DayOfWeek: rename_func("dayofweek"), sge.DayOfMonth: rename_func("dayofmonth"), + sge.Interval: _interval_with_precision, } class Tokenizer(Hive.Tokenizer):